"""
Agent Leaderboard v1 - Main leaderboard interface
Updated implementation with LLM Type support and optimized radar charts
"""

import base64
import math
import re
from datetime import datetime
from pathlib import Path

import gradio as gr
import pandas as pd
import plotly.graph_objects as go

# Import components and styles from modular files
from components.leaderboard_components import (
    get_chart_colors, get_rank_badge, get_type_badge, 
    get_metric_tooltip, get_responsive_styles, get_faq_section
)
from styles.leaderboard_styles import get_leaderboard_css

ASSET_ICON_PATH = Path("krew_icon.png")
KREW_ICON_BASE64 = ""
if ASSET_ICON_PATH.exists():
    KREW_ICON_BASE64 = base64.b64encode(ASSET_ICON_PATH.read_bytes()).decode("utf-8")

CSV_PATH = Path("combined_evaluation_summary.csv")
if CSV_PATH.exists():
    EVALUATION_DATE = datetime.fromtimestamp(CSV_PATH.stat().st_mtime).strftime("%Y-%m-%d")
else:
    EVALUATION_DATE = datetime.today().strftime("%Y-%m-%d")


def create_leaderboard_v2_tab():
    """Create the main leaderboard v1 tab with interactive table"""
    token_to_cost_factor = 2e-6  # Rough cost per token ($2 per 1M tokens)
    tokens_per_turn = 1000  # Approximate tokens exchanged per turn for scaling
    level_ids = [f"L{i}" for i in range(1, 8)]
    level_tsq_sources = {
        "L1": "L1_ArgAcc",
        "L2": "L2_SelectAcc",
        "L3": "L3_PSM",
        "L4": "L4_Coverage",
        "L5": "L5_AdaptiveRoutingScore",
        "L6": "L6_EffScore",
        "L7": "L7_ContextRetention",
    }
    
    def load_leaderboard_data():
        """Load and prepare the leaderboard data"""
        df = pd.read_csv('combined_evaluation_summary.csv')
        
        # Clean and prepare data
        df = df.copy()
        numeric_candidate_cols = [col for col in df.columns if col not in ('Model', 'Vendor')]
        for col in numeric_candidate_cols:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        
        # Derive per-level helper columns for cost and turns
        sr_columns = []
        tsq_columns = []
        duration_columns = []
        cost_columns = []
        turns_columns = []
        
        for level in level_ids:
            sr_col = f"{level}_SR"
            if sr_col in df.columns:
                sr_columns.append(sr_col)
                df[sr_col] = df[sr_col].round(3)
            
            tsq_source = level_tsq_sources.get(level)
            if tsq_source and tsq_source in df.columns:
                tsq_columns.append(tsq_source)
            
            duration_col = f"{level}_Avg_Exec_Time"
            if duration_col in df.columns:
                duration_columns.append(duration_col)
            
            token_col = f"{level}_Avg_Tokens"
            if token_col in df.columns:
                cost_col = f"{level}_Avg_Cost"
                turns_col = f"{level}_Avg_Turns"
                df[cost_col] = df[token_col] * token_to_cost_factor
                df[turns_col] = df[token_col] / tokens_per_turn
                cost_columns.append(cost_col)
                turns_columns.append(turns_col)
        
        if sr_columns:
            df['Avg AC'] = df[sr_columns].mean(axis=1)
        if tsq_columns:
            df['Avg TSQ'] = df[tsq_columns].mean(axis=1)
        if cost_columns:
            df['Avg Total Cost'] = df[cost_columns].mean(axis=1)
        if duration_columns:
            df['Avg Session Duration'] = df[duration_columns].mean(axis=1)
        if turns_columns:
            df['Avg Turns'] = df[turns_columns].mean(axis=1)
        
        # Derive core capability metrics for radar visualization
        if sr_columns:
            df['Overall Success'] = df[sr_columns].mean(axis=1)
        execution_cols = [col for col in ['L1_CallEM', 'L1_ArgAcc', 'L2_SelectAcc'] if col in df.columns]
        if execution_cols:
            df['Execution Accuracy'] = df[execution_cols].mean(axis=1)
        reasoning_cols = [col for col in ['L3_ProvAcc', 'L3_PSM', 'L4_Coverage'] if col in df.columns]
        if reasoning_cols:
            df['Complex Reasoning'] = df[reasoning_cols].mean(axis=1)
        robustness_cols = [col for col in ['L5_AdaptiveRoutingScore', 'L5_FallbackSR'] if col in df.columns]
        if robustness_cols:
            df['Robustness'] = df[robustness_cols].mean(axis=1)
        context_cols = [col for col in ['L6_ReuseRage', 'L6_EffScore', 'L7_ContextRetention'] if col in df.columns]
        if context_cols:
            df['Context & Efficiency'] = df[context_cols].mean(axis=1)
        epr_cols = [f"L{i}_EPR_CVR" for i in range(1, 8) if f"L{i}_EPR_CVR" in df.columns]
        if epr_cols:
            df['Call Validity'] = df[epr_cols].mean(axis=1)
        
        # Use LLM Type from CSV directly, with mapping to display names
        if 'LLM Type' in df.columns:
            # Clean the LLM Type column to remove any whitespace
            df['LLM Type'] = df['LLM Type'].astype(str).str.strip()
            
            # Map LLM Type to Model Type
            def map_llm_type(llm_type):
                if llm_type.upper() == "OSS":
                    return "Open source"
                else:
                    return "Proprietary"
            
            df['Model Type'] = df['LLM Type'].apply(map_llm_type)
        else:
            # Fallback to vendor mapping if LLM Type column doesn't exist
            vendor_model_type_map = {
                "OpenAI": "Proprietary",
                "Anthropic": "Proprietary", 
                "Google": "Proprietary",
                "Microsoft": "Proprietary",
                "Mistral": "Proprietary",
                "Databricks": "Open source",
                "Meta": "Open source",
                "Alibaba": "Open source",
                "알리바바": "Open source",  # Korean name for Alibaba
                "Kakao": "Open source",
                "SKT": "Open source", 
                "KT": "Open source",
                "xAI": "Proprietary",
            }
            df['Model Type'] = df['Vendor'].map(vendor_model_type_map).fillna('Proprietary')
        
        # Round numeric columns for better display
        round_three_cols = ['Avg AC', 'Avg TSQ', 'Avg Total Cost', 'Overall Success', 'Execution Accuracy',
                            'Complex Reasoning', 'Robustness', 'Context & Efficiency', 'Call Validity']
        round_one_cols = ['Avg Session Duration', 'Avg Turns']
        for col in round_three_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').round(3)
        for col in round_one_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').round(1)
        if cost_columns:
            df[cost_columns] = df[cost_columns].apply(pd.to_numeric, errors='coerce').round(3)
        if turns_columns:
            df[turns_columns] = df[turns_columns].apply(pd.to_numeric, errors='coerce').round(2)
        if duration_columns:
            df[duration_columns] = df[duration_columns].apply(pd.to_numeric, errors='coerce').round(2)
        
        # Fill NaN values appropriately
        df = df.fillna('')
        
        return df

    def build_static_radar_chart(values, labels):
        """Render a small static radar chart as inline SVG"""
        if not values or all(v == 0 for v in values):
            return """
                <div class="radar-placeholder">
                    <span>레이더 차트</span>
                    <small>기초 수행력 · 복합 추론력 · 견고성 · 맥락 효율성 · 전반적 성공률 · 기본적 유효성</small>
                </div>
            """
        size = 220
        center = size / 2
        radius = size * 0.38
        n = len(values)
        def point(v, idx, scale=1.0):
            angle = (2 * math.pi * idx / n) - math.pi / 2
            r = radius * v * scale
            x = center + r * math.cos(angle)
            y = center + r * math.sin(angle)
            return x, y
        polygon_points = " ".join(f"{x:.2f},{y:.2f}" for x, y in (point(v, i) for i, v in enumerate(values)))
        ring_polygons = []
        for step in (0.33, 0.66, 1.0):
            ring_points = " ".join(f"{x:.2f},{y:.2f}" for x, y in (point(step, i) for i in range(n)))
            opacity = 0.04 if step < 1.0 else 0.08
            ring_polygons.append(f'<polygon points="{ring_points}" fill="rgba(245,246,247,{opacity})" stroke="rgba(148,163,184,0.35)" stroke-width="1" />')
        axis_lines = "\n".join(
            f'<line x1="{center:.2f}" y1="{center:.2f}" x2="{point(1, idx)[0]:.2f}" y2="{point(1, idx)[1]:.2f}" stroke="rgba(148,163,184,0.35)" stroke-width="1" />'
            for idx in range(n)
        )
        label_spans = "\n".join(
            f'<text x="{point(1.1, idx)[0]:.2f}" y="{point(1.1, idx)[1]:.2f}" text-anchor="middle" dominant-baseline="middle" font-size="9" fill="white">{label}</text>'
            for idx, label in enumerate(labels)
        )
        svg = f"""
        <svg width="{size}" height="{size}" viewBox="0 0 {size} {size}" xmlns="http://www.w3.org/2000/svg" role="img">
            <defs>
                <radialGradient id="radarGlow" cx="50%" cy="50%" r="50%">
                    <stop offset="0%" stop-color="rgba(255,210,30,0.25)" />
                    <stop offset="100%" stop-color="rgba(255,210,30,0.0)" />
                </radialGradient>
            </defs>
            <rect width="{size}" height="{size}" fill="url(#radarGlow)" opacity="0.2" />
            {''.join(ring_polygons)}
            {axis_lines}
            <polygon points="{polygon_points}" fill="rgba(255,210,30,0.35)" stroke="#ffd21e" stroke-width="2" />
            {label_spans}
        </svg>
        """
        return svg

    # Level metadata for the 7-stage task framework
    level_details = {
        "ALL": {
            "title": "ALL · 전체 태스크",
            "description": "7개의 태스크 전반의 평균 성능을 한눈에 살펴보고 각 레벨 비교를 위한 기준점을 제공합니다."
        },
        "L1": {
            "title": "<span style='color: white;'>L1 · 단일 도구 실행</span>",
            "description": "<span style='color: white;'>단일 도구 실행 능력과 기본적인 명령 수행 정확도를 평가합니다.</span>"
        },
        "L2": {
            "title": "<span style='color: white;'>L2 · 도구 선택 능력</span>",
            "description": "<span style='color: white;'>요구 사항에 맞는 도구를 고르고 적절한 파라미터로 호출하는 능력을 측정합니다.</span>"
        },
        "L3": {
            "title": "<span style='color: white;'>L3 · 순차적 추론 (Chaining)</span>",
            "description": "<span style='color: white;'>복수 단계의 순차적 reasoning을 통해 문제를 해결하는 과정을 검증합니다.</span>"
        },
        "L4": {
            "title": "<span style='color: white;'>L4 · 병렬적 추론 (Aggregation)</span>",
            "description": "<span style='color: white;'>여러 소스의 정보를 병렬적으로 통합하고 요약하는 능력을 평가합니다.</span>"
        },
        "L5": {
            "title": "<span style='color: white;'>L5 · 강건성 (Robustness / Fallback)</span>",
            "description": "<span style='color: white;'>예상치 못한 오류나 실패 상황에 대한 인지와 대응 전략을 확인합니다.</span>"
        },
        "L6": {
            "title": "<span style='color: white;'>L6 · 효율성 (Efficiency)</span>",
            "description": "<span style='color: white;'>최소한의 호출과 비용으로 목표를 달성하는 운영 효율을 살펴봅니다.</span>"
        },
        "L7": {
            "title": "<span style='color: white;'>L7 · 장기 컨텍스트 기억 (Contextual Memory)</span>",
            "description": "<span style='color: white;'>장기 대화 맥락을 유지하고 적절히 활용하는 능력을 집중적으로 분석합니다.</span>"
        }
    }
    default_level = "ALL"
    
    sr_column_map = {level: f"{level}_SR" for level in level_ids}
    overall_sort_column = "Overall Success"
    
    def resolve_level(level_value):
        """Normalize the incoming level filter value"""
        if not level_value:
            return default_level
        return level_value if level_value in level_details else default_level
    
    def generate_html_table(filtered_df, highlight_column):
        """Generate styled HTML table with per-level success rates"""
        valid_highlights = list(sr_column_map.values()) + ["Overall Success"]
        highlight_column = highlight_column if highlight_column in valid_highlights else None
        overall_column = "Overall Success"
        overall_highlight = (highlight_column == overall_column)
        highlight_map = {level: (sr_column_map[level] == highlight_column) for level in level_ids}
        
        table_html = """
        <style>
            /* Dark theme table styling */
            .v2-table-container {
                background: var(--bg-card);
                border-radius: 16px;
                overflow: hidden;
                border: 1px solid var(--border-subtle);
                margin-top: 20px;
            }
            
            .v2-styled-table {
                width: 100%;
                border-collapse: collapse;
                font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
                background: var(--bg-card);
                color: white;
            }
            
            .v2-styled-table thead {
                position: sticky;
                top: 0;
                background: rgba(255, 210, 30, 0.1);
                z-index: 1;
            }
            
            .v2-styled-table th {
                padding: 14px 12px;
                text-align: left;
                font-weight: 600;
                color: white;
                border-bottom: 2px solid var(--accent-primary);
                font-size: 13px;
                text-transform: uppercase;
                letter-spacing: 0.05em;
            }
            
            .v2-styled-table th.numeric-cell {
                text-align: center;
            }
            
            .v2-styled-table td {
                padding: 12px;
                border-bottom: 1px solid var(--border-subtle);
                color: white;
                transition: all 0.2s ease;
            }
            
            .v2-styled-table tbody tr {
                transition: all 0.3s ease;
            }
            
            .v2-styled-table tbody tr:hover {
                background: rgba(255, 210, 30, 0.15) !important;
                box-shadow: 0 0 20px rgba(255, 210, 30, 0.3), inset 0 0 20px rgba(255, 210, 30, 0.1);
                transform: scale(1.01);
            }
            
            .v2-styled-table tbody tr:nth-child(even) {
                background: var(--bg-secondary);
            }
            
            .model-name {
                font-weight: 500;
                color: white;
                transition: color 0.2s ease;
            }
            
            /* Keep model name color consistent on hover to emphasize row highlight */
            .v2-styled-table tr:hover .model-name {
                color: white;
            }
            
            .numeric-cell {
                font-family: 'Geist Mono', monospace;
                font-size: 13px;
                text-align: center;
            }
            
            .highlight-header {
                background: rgba(255, 210, 30, 0.14);
                color: white;
            }
            
            .highlight-cell {
                background: rgba(255, 210, 30, 0.08);
                color: white;
                font-weight: 600;
            }
        </style>
        
        <div class="v2-table-container">
            <table class="v2-styled-table">
                <thead>
                    <tr>
                        <th style="width: 80px;">Rank</th>
                        <th>Model</th>
                        <th>Vendor</th>
                        <th style="width: 120px;">LLM Type</th>
        """
        overall_header_classes = ["numeric-cell"]
        if overall_highlight:
            overall_header_classes.append("highlight-header")
        table_html += f"""
                        <th class="{' '.join(overall_header_classes)}" title="Average success rate across all levels">
                            <span class="metric-header">Overall <span class="info-icon">ⓘ</span></span>
                        </th>
        """
        for level in level_ids:
            header_classes = ["numeric-cell"]
            if highlight_map.get(level):
                header_classes.append("highlight-header")
            table_html += f"""
                        <th class="{' '.join(header_classes)}" title="Average success rate for {level}">
                            <span class="metric-header">{level} <span class="info-icon">ⓘ</span></span>
                        </th>
            """
        table_html += """
                    </tr>
                </thead>
                <tbody>
        """
        def safe_float(value):
            if value is None:
                return ''
            if isinstance(value, str) and value.strip() == '':
                return ''
            if pd.isna(value):
                return ''
            try:
                return float(value)
            except (TypeError, ValueError):
                return ''
        
        # Generate table rows
        for idx, (_, row) in enumerate(filtered_df.iterrows()):
            rank = idx + 1
            table_html += f"""
                <tr>
                    <td>{get_rank_badge(rank)}</td>
                    <td class="model-name">{row['Model']}</td>
                    <td>{row['Vendor']}</td>
                    <td>{get_type_badge(row['Model Type'])}</td>
            """
            overall_value = safe_float(row.get(overall_column, ''))
            if overall_value != '':
                overall_display = f'{overall_value:.3f}'
            else:
                overall_display = '-'
            overall_classes = ["numeric-cell"]
            if overall_highlight:
                overall_classes.append("highlight-cell")
            table_html += f'<td class="{" ".join(overall_classes)}">{overall_display}</td>'
            for level in level_ids:
                sr_col = sr_column_map[level]
                value = safe_float(row.get(sr_col, ''))
                if value != '':
                    value_display = f'{value:.3f}'
                else:
                    value_display = '-'
                cell_classes = ["numeric-cell"]
                if highlight_map.get(level):
                    cell_classes.append("highlight-cell")
                table_html += f'<td class="{" ".join(cell_classes)}">{value_display}</td>'
            table_html += "</tr>"
        
        table_html += """
                </tbody>
            </table>
        </div>
        """
        
        return table_html
    
    def update_leaderboard_title(level_filter):
        """Update the leaderboard title based on selected level"""
        level_key = resolve_level(level_filter)
        level_info = level_details.get(level_key, level_details[default_level])
        level_title = level_info["title"]
        level_description = level_info["description"]
        
        return f"""
        <div class="domain-selector-container leaderboard-intro">
            <div class="domain-header">
                <h2 class="domain-title" style="color: white;">Agent Leaderboard · {level_title}</h2>
                <p class="domain-subtitle" style="color: white;">{level_description}</p>
            </div>
            <div class="dataframe-container">
        """
    
    model_type_lookup = {
        "OSS": "Open source",
        "API": "Proprietary"
    }
    
    def apply_filters(df, level_filter, model_type_filter, sort_order, sort_by="Overall Success"):
        """Apply shared filters and sorting to the leaderboard dataframe."""
        filtered_df = df.copy()
        level_key = resolve_level(level_filter)
        highlight_column = None
        
        if model_type_filter != "All":
            mapped_type = model_type_lookup.get(model_type_filter, model_type_filter)
            filtered_df = filtered_df[filtered_df['Model Type'] == mapped_type]
        
        actual_sort_column = sort_by if sort_by in filtered_df.columns else None
        if not actual_sort_column:
            if level_key == "ALL":
                actual_sort_column = overall_sort_column if overall_sort_column in filtered_df.columns else None
            else:
                actual_sort_column = sr_column_map.get(level_key)
        
        if level_key in sr_column_map:
            highlight_column = sr_column_map[level_key]
        elif level_key == "ALL" and overall_sort_column in filtered_df.columns:
            highlight_column = overall_sort_column
        
        if actual_sort_column and actual_sort_column in filtered_df.columns:
            ascending = (sort_order == "Ascending")
            filtered_df = filtered_df.sort_values(by=actual_sort_column, ascending=ascending, na_position='last')
        
        return filtered_df, level_key, highlight_column

    def filter_and_sort_data(level_filter, model_type_filter, sort_by, sort_order):
        """Filter and sort the leaderboard data"""
        df = load_leaderboard_data()
        
        filtered_df, level_key, highlight_column = apply_filters(df, level_filter, model_type_filter, sort_order, sort_by)
        
        # Generate HTML table
        return generate_html_table(filtered_df, highlight_column)
    
    # Load initial data
    initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
    initial_df = load_leaderboard_data()  # Load raw data for model selector
    initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
    initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
    initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
    initial_level_metric_level = level_ids[0] if level_ids else None
    initial_level_model_choices = initial_df['Model'].tolist() if len(initial_df) > 0 else []
    initial_level_model_values = initial_level_model_choices[:5]
    initial_level_metric_chart = create_level_metric_chart(
        initial_df, 
        initial_level_metric_level,
        initial_level_model_values
    ) if initial_level_metric_level else create_empty_level_metric_chart("No level metrics available")
    
    # Load custom CSS and responsive styles
    custom_css = get_leaderboard_css() + get_responsive_styles() + """
    <style>
    /* Page-specific styles for leaderboard v2 */
    
    /* Metric header styles with info icons */
    .metric-header {
        cursor: help;
        display: inline-flex;
        align-items: center;
        gap: 6px;
    }
    
    .info-icon {
        color: var(--accent-secondary);
        font-size: 1em;
        opacity: 0.8;
        transition: opacity 0.2s ease;
        font-weight: normal;
    }
    
    .metric-header:hover .info-icon {
        opacity: 1;
    }
    
    /* Native tooltip styling */
    .v2-styled-table th[title] {
        cursor: help;
    }
    
    /* Custom tooltip using CSS only */
    [data-tooltip] {
        position: relative;
        cursor: help;
    }
    
    [data-tooltip]::before {
        content: attr(data-tooltip);
        position: absolute;
        bottom: 100%;
        left: 50%;
        transform: translateX(-50%);
        background: rgba(26, 26, 46, 0.95);
        color: #f5f6f7;
        padding: 8px 12px;
        border-radius: 6px;
        font-size: 12px;
        white-space: nowrap;
        max-width: 300px;
        z-index: 10000;
        opacity: 0;
        pointer-events: none;
        transition: opacity 0.3s;
        margin-bottom: 5px;
        border: 1px solid rgba(16, 152, 247, 0.3);
        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.8);
    }
    
    [data-tooltip]:hover::before {
        opacity: 0.8;
    }
    
    /* Dark theme table styling */
    .v2-table-container {
        background: var(--bg-card);
        border-radius: 16px;
        overflow: visible;  /* Changed from hidden to visible for tooltips */
        border: 1px solid var(--border-subtle);
        margin-top: 20px;
        position: relative;
    }
    
    
    .v2-styled-table {
        width: 100%;
        border-collapse: collapse;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
        background: var(--bg-card);
        color: var(--text-primary);
    }
    
    .v2-styled-table thead {
        position: sticky;
        top: 0;
        background: rgba(255, 210, 30, 0.1);
        z-index: 1;
    }
    
    .v2-styled-table th {
        padding: 14px 12px;
        text-align: left;
        font-weight: 600;
        color: var(--text-primary);
        border-bottom: 2px solid var(--accent-primary);
        font-size: 14px;
        text-transform: uppercase;
        letter-spacing: 0.05em;
        position: relative;  /* Added for tooltip positioning */
    }
    
    .v2-styled-table td {
        padding: 12px;
        border-bottom: 1px solid var(--border-subtle);
        color: var(--text-primary);
        font-size: 14px;
        transition: all 0.2s ease;
    }
    
    .v2-styled-table tbody tr {
        transition: all 0.3s ease;
    }
    
    .v2-styled-table tbody tr:hover {
        background: rgba(255, 210, 30, 0.15) !important;
        box-shadow: 0 0 20px rgba(255, 210, 30, 0.3), inset 0 0 20px rgba(255, 210, 30, 0.1);
        transform: scale(1.01);
    }
    
    .v2-styled-table tbody tr:nth-child(even) {
        background: var(--bg-secondary);
    }
    
    .model-name {
        font-weight: 500;
        color: var(--accent-primary);
        font-size: 14px;
        transition: color 0.2s ease;
    }
    
    .v2-styled-table tr:hover .model-name {
        color: var(--accent-secondary);
    }
    
    .numeric-cell {
        font-family: 'Geist Mono', monospace;
        font-size: 14px;
        text-align: center;
    }
    
    </style>
    
    <script>
    // Function to update radio button styling
    function updateRadioStyling() {
        // Remove selected class from all labels first
        document.querySelectorAll('.selected').forEach(function(label) {
            label.classList.remove('selected');
        });
        document.querySelectorAll('.domain-radio label').forEach(function(label) {
            label.style.background = '';
            label.style.borderColor = '';
            label.style.transform = '';
            label.style.fontWeight = '';
        });
        
        // Apply selected class to checked radio buttons
        document.querySelectorAll('input[type="radio"]:checked').forEach(function(input) {
            var label = input.closest('label');
            if (label) {
                label.classList.add('selected');
                
                // For domain radio buttons, apply special styling
                if (label.closest('.domain-radio')) {
                    label.style.background = '#ffd21e33';
                    label.style.borderColor = 'var(--accent-primary)';
                    label.style.transform = 'scale(1.05)';
                    label.style.fontWeight = '600';
                }
            }
        });
    }
    
    // Wait for Gradio to initialize
    function initializeRadioStyles() {
        updateRadioStyling();
        
        // Create observer to watch for changes
        var observer = new MutationObserver(function(mutations) {
            mutations.forEach(function(mutation) {
                if (mutation.type === 'attributes' && mutation.attributeName === 'checked') {
                    updateRadioStyling();
                }
            });
        });
        
        // Observe all radio inputs
        document.querySelectorAll('input[type="radio"]').forEach(function(radio) {
            observer.observe(radio, { attributes: true });
        });
    }
    
    // Try multiple initialization strategies
    document.addEventListener('DOMContentLoaded', function() {
        setTimeout(initializeRadioStyles, 100);
        setTimeout(initializeRadioStyles, 500);
        setTimeout(initializeRadioStyles, 1000);
    });
    
    // Also check when window loads
    window.addEventListener('load', function() {
        setTimeout(initializeRadioStyles, 100);
    });
    
    // Listen for Gradio's custom events
    document.addEventListener('gradio:loaded', initializeRadioStyles);
    </script>
    """
    
    gr.HTML(custom_css)
    
    # Header styles and navigation
    gr.HTML("""
    <style>
    /* Enhanced button styling with better gradio compatibility */
    .header-action-button {
        display: inline-block !important;
        padding: 14px 28px !important;
        background: #ffd21e !important;
        color: #FFFFFF !important;
        text-decoration: none !important;
        border-radius: 16px !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 700 !important;
        font-size: 1.1rem !important;
        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
        border: none !important;
        cursor: pointer !important;
        box-shadow: 0 8px 24px rgba(255, 210, 30, 0.4), 0 4px 12px rgba(0, 0, 0, 0.3) !important;
        position: relative !important;
        overflow: hidden !important;
        text-shadow: 0 1px 2px rgba(0, 0, 0, 0.35) !important;
    }
    
    .header-action-button::before {
        content: '';
        position: absolute;
        top: 0;
        left: -100%;
        width: 100%;
        height: 100%;
        background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
        transition: left 0.6s;
    }
    
    .header-action-button:hover::before {
        left: 100%;
    }
    
    .header-action-button:hover {
        transform: translateY(-3px) !important;
        box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
        background: #ffd21e !important;
        color: #FFFFFF !important;
        text-decoration: none !important;
        text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
    }
    
    .header-action-button:active {
        transform: translateY(-1px) !important;
    }
    
    .action-button-icon {
        font-size: 1.2rem !important;
        margin-right: 8px !important;
        filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
    }
    
    #hero-banner {
        max-width: 960px;
        margin: 0 auto 20px auto;
        border-radius: 16px;
        overflow: hidden;
        box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25);
    }
    
    #hero-banner img {
        width: 100%;
        height: auto;
        display: block;
    }
    
    .hero-title {
        font-size: 5rem;
        font-weight: 800;
        line-height: 1.1;
        background: linear-gradient(135deg, #FFE082 0%, #FFC107 50%, #FFB300 100%);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        margin-bottom: 1rem;
    }
    
    .hero-subtitle {
        color: var(--text-secondary);
        font-size: 1.25rem;
        font-family: 'Geist', sans-serif;
        margin-top: 0;
    }
    
    .hero-actions {
        display: flex;
        justify-content: center;
        align-items: center;
        gap: 16px;
        flex-wrap: wrap;
        margin: 32px 0;
        padding: 0 20px;
    }
    
    .hero-action-button {
        display: inline-flex !important;
        align-items: center !important;
        gap: 12px !important;
        padding: 10px 16px !important;
        background: rgba(245, 246, 247, 0.06) !important;
        border: 1px solid var(--border-subtle) !important;
        border-radius: 999px !important;
        color: var(--text-primary) !important;
        text-decoration: none !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 600 !important;
        font-size: 0.95rem !important;
        transition: all 0.3s ease !important;
        backdrop-filter: blur(10px) !important;
        -webkit-backdrop-filter: blur(10px) !important;
    }
    
    .hero-action-button:hover {
        transform: translateY(-2px) !important;
        border-color: var(--accent-primary) !important;
        background: rgba(255, 210, 30, 0.12) !important;
        text-decoration: none !important;
    }
    
    .hero-action-button svg {
        width: 20px;
        height: 20px;
    }
    
    .hero-action-button span {
        font-weight: 600;
        letter-spacing: 0.01em;
    }
    
    .dashboard-section {
        margin: 48px auto 0 auto;
        max-width: 1100px;
        padding: 40px;
        background: rgba(245, 246, 247, 0.06);
        border: 1px solid var(--border-subtle);
        border-radius: 24px;
        box-shadow: 0 12px 30px rgba(0, 0, 0, 0.25);
        backdrop-filter: blur(12px);
        -webkit-backdrop-filter: blur(12px);
    }
    
    .dashboard-section.emphasized {
        background: #ffd21e26;
        border-color: rgba(255, 210, 30, 0.6);
        box-shadow: 0 24px 50px rgba(255, 210, 30, 0.25);
    }
    
    .dashboard-section .section-header {
        display: flex;
        flex-direction: column;
        align-items: center;
        justify-content: center;
        text-align: center;
        gap: 12px;
        margin-bottom: 32px;
    }
    
    .section-title {
        font-size: 2.2rem;
        font-weight: 700;
        color: var(--text-primary);
        margin-bottom: 12px;
        text-align: center !important;
    }
    
    .section-lead, .section-subtitle {
        font-size: 1.1rem;
        color: var(--text-secondary);
        max-width: 720px;
        margin: 0 auto 24px auto;
        line-height: 1.7;
        text-align: center !important;
        word-break: keep-all;
        white-space: normal;
        display: block;
    }
    
    .phase-grid {
        display: grid;
        grid-template-columns: repeat(2, minmax(0, 1fr));
        gap: 24px;
    }
    
    .phase-card {
        padding: 28px;
        border-radius: 20px;
        border: 1px solid var(--border-subtle);
        background: rgba(1, 9, 26, 0.65);
        box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03);
    }
    
    .phase-card h3 {
        font-size: 1.5rem;
        color: var(--text-primary);
        margin-bottom: 20px;
        font-weight: 700;
    }
    
    .phase-chart {
        width: 120px;
        height: 120px;
        border-radius: 50%;
        background: conic-gradient(var(--accent-primary) var(--progress), rgba(255, 210, 30, 0.15) var(--progress));
        display: flex;
        align-items: center;
        justify-content: center;
        margin-bottom: 24px;
        margin-left: auto;
        margin-right: auto;
        position: relative;
    }
    
    .phase-chart::after {
        content: '';
        position: absolute;
        width: 80px;
        height: 80px;
        border-radius: 50%;
        background: rgba(1, 9, 26, 0.95);
    }
    
    .phase-chart span {
        position: relative;
        font-size: 1.5rem;
        font-weight: 700;
        color: var(--text-primary);
    }
    
    .phase-list {
        list-style: none;
        padding: 0;
        margin: 0;
        display: grid;
        gap: 12px;
    }
    
    .phase-list li {
        padding: 12px 16px;
        border-radius: 12px;
        background: rgba(245, 246, 247, 0.05);
        border: 1px solid rgba(245, 246, 247, 0.08);
        color: var(--text-secondary);
        font-size: 0.95rem;
    }
    
    .scenario-body {
        max-width: 760px;
        margin: 0 auto;
        text-align: center;
    }
    
    .scenario-body p {
        font-size: 1.05rem;
        line-height: 1.7;
        color: var(--text-secondary);
        margin-bottom: 32px;
    }
    
    .section-flow {
        display: flex;
        justify-content: center;
        align-items: center;
        margin-top: 24px;
        color: var(--accent-primary);
        font-size: 2rem;
    }
    
    .criteria-grid {
        display: grid;
        grid-template-columns: repeat(3, minmax(0, 1fr));
        gap: 24px;
    }
    
    .criteria-card {
        padding: 24px;
        border-radius: 20px;
        border: 1px solid var(--border-subtle);
        background: rgba(1, 9, 26, 0.7);
        display: flex;
        flex-direction: column;
        gap: 16px;
        box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03);
    }
    
    .criteria-card h3 {
        font-size: 1.25rem;
        font-weight: 700;
        color: var(--text-primary);
        margin: 0;
    }
    
    .criteria-card ul {
        list-style: disc;
        margin: 0;
        padding-left: 20px;
        color: var(--text-secondary);
        display: grid;
        gap: 10px;
        font-size: 0.95rem;
        line-height: 1.5;
    }
    
    /* Responsive design */
    @media (max-width: 768px) {
        .hero-title {
            font-size: 3rem;
        }
        .hero-action-button {
            width: 100% !important;
            justify-content: center !important;
        }
        .dashboard-section {
            padding: 28px;
            margin: 32px 16px 0 16px;
        }
        .phase-grid {
            grid-template-columns: 1fr;
        }
        .criteria-grid {
            grid-template-columns: 1fr;
        }
    }
    
    @media (max-width: 480px) {
        .hero-actions {
            flex-direction: column;
            gap: 8px;
        }
        .section-title {
            font-size: 1.8rem;
        }
        .phase-chart {
            width: 100px;
            height: 100px;
        }
        .phase-chart::after {
            width: 68px;
            height: 68px;
        }
    }
    </style>
    """)

    gr.Image(
        value="banner.png",
        show_label=False,
        interactive=False,
        type="filepath",
        elem_id="hero-banner"
    )
    
    gr.HTML("""
    <div style="text-align: center; padding: 20px 0;">
        <h1 class="hero-title">Hugging Face KREW Ko-AgentBench</h1>
        <p class="hero-subtitle">한국 실사용 환경 특화 에이전트 벤치마크</p>
    </div>
    """)
    
    # Links section below title
    gr.HTML("""
    <div class="hero-actions">
        <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
                <line x1="8" y1="12" x2="16" y2="12"/>
            </svg>
            <span>Blog</span>
        </a>
        <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                <path d="M9 19c-5 1.5-5-2.5-7-3"/>
                <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
            </svg>
            <span>GitHub</span>
        </a>
        <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
                <polyline points="7 10 12 15 17 10"/>
                <line x1="12" y1="15" x2="12" y2="3"/>
            </svg>
            <span>Dataset</span>
        </a>
        <a href="https://huggingface.co/spaces/galileo-ai/agent-leaderboard/discussions/new" target="_blank" rel="noopener noreferrer" class="hero-action-button">
            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                <path d="M3 3v18h18"/>
                <path d="M7 17v-6"/>
                <path d="M12 17V7"/>
                <path d="M17 17v-3"/>
            </svg>
            <span>Metrics</span>
        </a>
    </div>
    """)

    # Section 1: 단계별 태스크 설계
    gr.HTML("""
    <div class="dashboard-section">
        <div class="section-header">
            <h2 class="section-title">단계별 태스크 설계</h2>
        </div>
        <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">단순 도구 호출부터 장기적 맥락 능력, 강건성 처리 능력까지 에이전트의 능력을 7단계로 입체적으로 분석하였습니다.</p>
        <div class="phase-grid">
            <div class="phase-card">
                <h3>Single-Turn</h3>
                <div class="phase-chart" style="--progress:80%;">
                    <span>80%</span>
                </div>
                <ul class="phase-list">
                    <li style="color: white;">L1: 단일 도구 실행</li>
                    <li style="color: white;">L2: 도구 선택 능력</li>
                    <li style="color: white;">L3: 순차적 reasoning (Chaining)</li>
                    <li style="color: white;">L4: 병렬적 reasoning (Aggregation)</li>
                    <li style="color: white;">L5: 강건성 (Robustness / Fallback)</li>
                </ul>
            </div>
            <div class="phase-card">
                <h3>Multi-Turn</h3>
                <div class="phase-chart" style="--progress:20%;">
                    <span>20%</span>
                </div>
                <ul class="phase-list">
                    <li style="color: white;">L6: 효율성 (Efficiency)</li>
                    <li style="color: white;">L7: 장기 컨텍스트 기억 (Contextual Memory)</li>
                </ul>
            </div>
        </div>
    </div>
    """)

    # Section 2: 핵심 시나리오 구성
    gr.HTML("""
    <div class="dashboard-section emphasized">
        <div class="section-header">
            <h2 class="section-title">18가지 한국형 API 사용 및 실생활 환경에 특화된 고품질 시나리오 구성</h2>
        </div>
        <div class="scenario-body">
            <p>네이버, 지도, 카카오, 웹사이트 등 한국 실사용 환경 기반의 API를 기반으로 국내 사용자의 일상과 밀접한 '약속 예약', '블로그 후기 검색'과 같은 현실적인 문제 해결 시나리오를 구현했습니다.</p>
        </div>
        <div class="section-flow">⌄</div>
    </div>
    """)

    # Section 3: 핵심 평가 기준
    gr.HTML("""
    <div class="dashboard-section">
        <div class="section-header">
            <h2 class="section-title">핵심 평가 기준</h2>
        </div>
        <div class="criteria-grid">
            <div class="criteria-card">
                <h3>핵심 기반 반복 평가</h3>
                <ul>
                    <li>실패 API 응답 개선</li>
                    <li>'정보 속성 불일치성 변경' 등 기존 벤치마크의 고질적 문제 해결</li>
                    <li>벤치마크의 일관성과 신뢰도 보장</li>
                </ul>
            </div>
            <div class="criteria-card">
                <h3>강건성 테스트</h3>
                <ul>
                    <li>의도된 오류 상황(상품 단종)의 오류 인식/대응 능력(전략)까지 평가</li>
                    <li>현실 환경에서도 안정적으로 작동하는 모델 선별</li>
                </ul>
            </div>
            <div class="criteria-card">
                <h3>단계별 고유 정밀 지표</h3>
                <ul>
                    <li>도구 선택, 파라미터 구성, 데이터 흐름 등 문제 해결의 불필요/소요 단계별 평가</li>
                    <li>모델의 강/약점 정량적으로 식별</li>
                </ul>
            </div>
        </div>
    </div>
    """)
    
    # Metrics overview cards removed per updated design
    
    # Domain filter section with enhanced styling
    gr.HTML("""
    <style>
    /* Enhanced domain selector styling */
    .domain-selector-container {
        background: #ffd21e0d;
        border-radius: 20px;
        padding: 32px;
        margin-bottom: 32px;
        border: 1px solid var(--border-subtle);
        position: relative;
        overflow: visible;
        box-shadow: 
            0 8px 32px rgba(0, 0, 0, 0.3),
            inset 0 1px 0 rgba(255, 255, 255, 0.05);
    }
    
    .domain-selector-container.leaderboard-intro {
        padding-bottom: 0;
        margin-bottom: 32px;
    }
    
    .leaderboard-intro .domain-header {
        margin-bottom: 20px;
    }
    
    .leaderboard-intro .domain-subtitle {
        font-size: 1.1rem;
        max-width: 720px;
        margin: 0 auto;
    }
    
    .leaderboard-intro .dataframe-container {
        margin: 16px -32px -32px;
        padding: 0 32px 32px;
        background: transparent;
        border-radius: 0 0 20px 20px;
    }
    
    .domain-performance-container {
        margin-bottom: 32px;
    }
    
    .domain-performance-container .domain-header {
        margin-bottom: 24px;
    }
    
    .domain-performance-container .domain-subtitle {
        font-size: 1.05rem;
        max-width: 720px;
        margin: 0 auto;
    }
    
    .leaderboard-intro .domain-title,
    .domain-performance-container > .domain-header .domain-title,
    .performance-card-container > .domain-header .domain-title {
        font-size: 2.6rem !important;
    }
    
    .performance-card-content {
        display: flex;
        flex-direction: column;
        gap: 24px;
        align-items: stretch;
    }
    
    .performance-card-display {
        flex: 1;
        min-width: 0;
    }
    
    .performance-card-container {
        margin-top: 32px;
    }
    
    .performance-card-container .domain-header {
        margin-bottom: 24px;
    }
    
    .performance-card-container .domain-subtitle {
        font-size: 1.05rem;
        max-width: 720px;
        margin: 0 auto;
    }
    
    .domain-header {
        text-align: center;
        margin-bottom: 28px;
        position: relative;
        z-index: 1;
    }
    
    .domain-title {
        font-size: 2rem;
        font-weight: 800;
        position: relative;
        display: inline-block;
        margin-bottom: 8px;
        padding: 4px 0;
        color: transparent !important;
        background: linear-gradient(120deg, rgba(255, 255, 255, 0.75) 0%, rgba(255, 210, 30, 0.95) 25%, rgba(255, 139, 0, 0.85) 50%, rgba(255, 210, 30, 0.95) 75%, rgba(255, 255, 255, 0.8) 100%);
        background-size: 220% 100%;
        -webkit-background-clip: text;
        background-clip: text;
        -webkit-text-fill-color: transparent;
        text-shadow: 0 0 22px rgba(255, 210, 30, 0.65), 0 0 45px rgba(255, 210, 30, 0.4);
        filter: drop-shadow(0 0 16px rgba(255, 210, 30, 0.35));
        letter-spacing: 0.02em;
        animation: title-shimmer 5s ease-in-out infinite;
    }

    @keyframes title-shimmer {
        0% {
            background-position: 0% 50%;
        }
        50% {
            background-position: 100% 50%;
        }
        100% {
            background-position: 0% 50%;
        }
    }
    
    .domain-subtitle {
        color: var(--text-secondary);
        font-size: 1.2rem;
        font-family: 'Geist', sans-serif;
    }
    
    /* Custom radio button styling */
    .domain-radio {
        display: flex !important;
        gap: 12px !important;
        flex-wrap: wrap !important;
        justify-content: center !important;
        position: relative;
        z-index: 1;
        background: transparent !important;
        border: none !important;
        box-shadow: none !important;
        padding: 6px 0 !important;
    }
    
    /* Gradio radio button wrapper */
    .domain-radio .wrap {
        display: flex !important;
        gap: 12px !important;
        flex-wrap: wrap !important;
        justify-content: center !important;
        width: 100% !important;
        background: transparent !important;
        border: none !important;
        box-shadow: none !important;
        padding: 6px 0 !important;
    }
    
    .domain-selector-container [role="radiogroup"],
    .domain-selector-container fieldset,
    .domain-selector-container .gradio-radio,
    .domain-selector-container .gradio-radio-group,
    .domain-selector-container .gr-form,
    .domain-selector-container .gradio-radio-group > div {
        background: transparent !important;
        border: none !important;
        box-shadow: none !important;
        padding: 0 !important;
    }
    
    #filters-sorting-container {
        padding: 28px !important;
    }
    
    #filters-sorting-container .gr-box,
    #filters-sorting-container .gradio-column,
    #filters-sorting-container .gradio-row,
    #filters-sorting-container .gradio-group,
    #filters-sorting-container .gradio-radio,
    #filters-sorting-container [role="radiogroup"],
    #filters-sorting-container fieldset {
        background: transparent !important;
        border: none !important;
        box-shadow: none !important;
    }
    
    .filters-sorting-row {
        gap: 18px !important;
        justify-content: center !important;
        flex-wrap: nowrap !important;
    }
    
    .filter-group {
        flex: 1 1 260px !important;
        display: flex !important;
        flex-direction: column !important;
        gap: 12px !important;
        align-items: flex-start !important;
        width: 100% !important;
    }
    
    .filter-group-row {
        display: flex !important;
        align-items: center !important;
        gap: 4px !important;
        justify-content: flex-start !important;
        flex-wrap: nowrap !important;
        width: 100% !important;
    }
    
    .filter-group-row > * {
        margin: 0 !important;
        padding: 0 !important;
        flex: 0 0 auto !important;
        width: auto !important;
        min-width: auto !important;
    }
    
    .filter-group-row > .gr-column,
    .filter-group-row > .gr-box {
        flex: 0 0 auto !important;
        width: auto !important;
        min-width: auto !important;
    }
    
    .filter-group-row .gradio-html,
    .filter-group-row .gradio-html > * {
        display: inline-flex !important;
        align-items: center !important;
        margin: 0 !important;
        padding: 0 !important;
        flex: 0 0 auto !important;
        width: auto !important;
        min-width: auto !important;
    }
    
    .filter-group-row .domain-radio {
        flex: 1 1 auto !important;
        width: 100% !important;
        margin: 0 !important;
        display: inline-flex !important;
        align-items: center !important;
        min-width: 220px !important;
        justify-content: flex-start !important;
        padding-right: 8px !important;
    }
    
    .filter-group .gr-input-label {
        font-size: 1rem !important;
        font-weight: 600 !important;
        color: var(--text-primary) !important;
        text-align: center !important;
        margin-bottom: 12px !important;
    }
    
    .filter-group-label {
        font-size: 1rem !important;
        font-weight: 600 !important;
        color: var(--text-primary) !important;
        text-align: left !important;
        margin: 0 !important;
        font-family: 'Geist', sans-serif !important;
        white-space: nowrap !important;
    }
    
    #filters-sorting-container .domain-radio,
    #filters-sorting-container .domain-radio .wrap {
        flex-wrap: nowrap !important;
        justify-content: flex-start !important;
        align-items: center !important;
        gap: 12px !important;
        width: 100% !important;
    }
    
    .domain-radio label,
    .domain-radio .wrap > label {
        display: inline-flex !important;
        align-items: center !important;
        justify-content: center !important;
        gap: 0 !important;
        padding: 12px 28px !important;
        background: rgba(245, 246, 247, 0.06) !important;
        border: 1px solid var(--border-subtle) !important;
        border-radius: 999px !important;
        cursor: pointer !important;
        transition: all 0.3s ease !important;
        text-align: center !important;
        position: relative !important;
        overflow: hidden !important;
        color: var(--text-primary) !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 600 !important;
        font-size: 0.95rem !important;
        letter-spacing: 0.01em !important;
        text-align: center !important;
        line-height: 1 !important;
        backdrop-filter: blur(10px) !important;
        -webkit-backdrop-filter: blur(10px) !important;
        min-width: 0 !important;
        flex: 0 0 auto !important;
        box-shadow: 0 0 18px -6px rgba(0, 0, 0, 0.45) !important;
        z-index: 0 !important;
    }
    
    .domain-radio label::before {
        content: '';
        position: absolute;
        inset: 0;
        background: #ffd21e14;
        opacity: 0;
        transition: opacity 0.3s ease;
        pointer-events: none;
        z-index: -1;
    }
    
    .domain-radio label:hover {
        border-color: var(--accent-primary) !important;
        background: rgba(255, 210, 30, 0.12) !important;
        box-shadow: 0 0 18px 0 rgba(255, 210, 30, 0.45) !important;
        border-color: rgba(255, 210, 30, 0.45) !important;
    }
    
    .domain-radio label:hover::before {
        opacity: 1;
    }
    
    .domain-radio input[type="radio"] {
        display: none !important;
    }
    
    .domain-radio input[type="radio"]:checked + label,
    .domain-radio .wrap > label:has(input[type="radio"]:checked),
    .domain-radio label.selected,
    .domain-radio label[aria-checked="true"] {
        background: #ffd21e33 !important;
        border-color: var(--accent-primary) !important;
        color: var(--accent-tertiary) !important;
        box-shadow: 0 0 24px 0 rgba(255, 210, 30, 0.55) !important;
    }
    
    .domain-radio input[type="radio"]:checked + label::before,
    .domain-radio label[aria-checked="true"]::before {
        opacity: 1;
    }
    
    /* Model selector styling */
    .model-selector-container {
        padding: 28px;
    }
    
    .model-selector-container .domain-header {
        margin-bottom: 18px;
    }
    
    .model-selector-container .domain-title {
        font-size: 1.8rem;
    }
    
    .model-selector-container .domain-subtitle {
        font-size: 1rem;
    }
    
    .model-selector-container .model-dropdown,
    .model-selector-container .pill-button {
        display: flex !important;
        justify-content: center !important;
        align-items: center !important;
        width: 100% !important;
    }
    
    .model-selector-container .model-dropdown {
        margin-bottom: 12px !important;
    }
    
    .model-dropdown {
        background: transparent !important;
        border: none !important;
        box-shadow: none !important;
        padding: 0 !important;
        margin: 0 !important;
    }
    
    .model-dropdown .gradio-dropdown,
    .model-dropdown .gradio-dropdown > div,
    .model-dropdown .gr-form,
    .model-dropdown .gradio-input,
    .model-dropdown .gradio-button-group,
    .model-dropdown .wrap {
        background: transparent !important;
        border: none !important;
        box-shadow: none !important;
        padding: 0 !important;
        margin: 0 !important;
        width: 100% !important;
    }
    
    .model-dropdown select,
    .model-dropdown [role="combobox"] {
        background: rgba(245, 246, 247, 0.06) !important;
        border: 1px solid var(--border-subtle) !important;
        border-radius: 999px !important;
        padding: 12px 24px !important;
        color: var(--text-primary) !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 600 !important;
        font-size: 1rem !important;
        letter-spacing: 0.01em !important;
        min-height: 46px !important;
        min-width: 240px !important;
        width: 100% !important;
        cursor: pointer !important;
        transition: all 0.3s ease !important;
        text-align: center !important;
        text-align-last: center !important;
    }
    
    .model-dropdown select:focus,
    .model-dropdown [role="combobox"]:focus-visible {
        outline: none !important;
        border-color: var(--accent-primary) !important;
        box-shadow: 0 0 0 3px rgba(255, 210, 30, 0.25) !important;
    }
    
    .model-dropdown button {
        display: inline-flex !important;
        align-items: center !important;
        justify-content: center !important;
        gap: 8px !important;
        width: 100% !important;
        padding: 12px 24px !important;
        background: rgba(245, 246, 247, 0.06) !important;
        border: 1px solid var(--border-subtle) !important;
        border-radius: 999px !important;
        color: var(--text-primary) !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 600 !important;
        font-size: 0.95rem !important;
        letter-spacing: 0.01em !important;
        cursor: pointer !important;
        transition: all 0.3s ease !important;
    }
    
    .model-dropdown button:hover:not(:disabled),
    .model-dropdown [role="combobox"]:hover {
        transform: translateY(-1px) !important;
        border-color: var(--accent-primary) !important;
        box-shadow: 0 12px 28px rgba(255, 210, 30, 0.25) !important;
        background: rgba(255, 210, 30, 0.12) !important;
    }
    
    .model-dropdown .tags {
        display: flex !important;
        flex-wrap: wrap !important;
        gap: 6px !important;
        justify-content: center !important;
    }
    
    .model-dropdown .tag {
        background: rgba(255, 210, 30, 0.18) !important;
        border: 1px solid rgba(255, 210, 30, 0.35) !important;
        color: var(--text-primary) !important;
        border-radius: 999px !important;
        padding: 4px 10px !important;
        font-size: 0.85rem !important;
        font-weight: 500 !important;
    }
    
    .model-dropdown label {
        display: flex !important;
        flex-direction: column !important;
        gap: 8px !important;
        width: 100% !important;
        align-items: center !important;
    }

    .model-dropdown label > span {
        display: none !important;
    }
    
    .pill-button button {
        display: inline-flex !important;
        align-items: center !important;
        justify-content: center !important;
        gap: 8px !important;
        padding: 12px 28px !important;
        background: #ffd21e !important;
        border: 1px solid rgba(255, 210, 30, 0.6) !important;
        border-radius: 999px !important;
        color: #FFFFFF !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 600 !important;
        font-size: 0.95rem !important;
        letter-spacing: 0.01em !important;
        text-decoration: none !important;
        cursor: pointer !important;
        transition: all 0.3s ease !important;
        box-shadow: 0 12px 28px rgba(255, 210, 30, 0.25) !important;
        border-bottom: none !important;
        text-shadow: 0 1px 2px rgba(0, 0, 0, 0.35) !important;
    }
    
    .pill-button button:hover:not(:disabled) {
        transform: translateY(-2px) !important;
        background: #ffd21e !important;
        box-shadow: 0 16px 36px rgba(255, 210, 30, 0.35) !important;
        text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
    }
    
    .pill-button button:disabled {
        opacity: 0.6 !important;
        cursor: not-allowed !important;
        box-shadow: none !important;
    }
    
    .level-selector-container {
        padding: 28px;
    }
    
    .level-selector-container .domain-header {
        margin-bottom: 18px;
    }
    
    .level-selector-container .domain-title {
        font-size: 1.8rem;
    }
    
    .level-selector-container .domain-subtitle {
        font-size: 1rem;
    }
    
    /* Domain icons */
    .domain-icon {
        font-size: 1.5rem;
        margin-bottom: 4px;
        display: block;
        filter: drop-shadow(0 0 10px currentColor);
    }
    
    .domain-name {
        font-size: 0.95rem;
        font-weight: 500;
        margin-top: 4px;
    }
    
    /* Badge for domain counts */
    .domain-count {
        position: absolute;
        top: 8px;
        right: 8px;
        background: var(--accent-primary);
        color: white;
        font-size: 0.75rem;
        padding: 2px 8px;
        border-radius: 12px;
        font-weight: 600;
        opacity: 0.8;
    }
    
    /* Filter radio buttons styling - smaller for better fit */
    .filter-radio {
        max-width: 100% !important;
    }
    
    .filter-radio .gr-row {
        gap: 8px !important;
    }
    
    .filter-radio .gr-column {
        min-width: 0 !important;
        flex: 1 !important;
    }
    
    .filter-radio .gr-form {
        min-width: 0 !important;
    }
    
    .filter-radio .gr-radio-group {
        gap: 4px !important;
    }
    
    .filter-radio .domain-radio {
        display: flex !important;
        gap: 4px !important;
        flex-wrap: nowrap !important;
        justify-content: center !important;
    }
    
    .filter-radio .domain-radio label {
        min-width: auto !important;
        max-width: 120px !important;
        padding: 8px 12px !important;
        font-size: 0.8rem !important;
        white-space: nowrap !important;
        overflow: hidden !important;
        text-overflow: ellipsis !important;
    }
    
    /* Additional targeting for the specific filter components */
    .filter-radio .gr-box {
        padding: 8px !important;
    }
    
    .filter-radio .gr-radio {
        gap: 4px !important;
    }
    
    .filter-radio .gr-input-label {
        font-size: 0.85rem !important;
        margin-bottom: 4px !important;
    }
    
    /* Force compact layout for the filters */
    @media (max-width: 1400px) {
        .filter-radio .domain-radio label {
            padding: 6px 10px !important;
            font-size: 0.75rem !important;
        }
    }
    
    /* Compact filter row styling */
    .compact-filter-row {
        margin-bottom: 20px !important;
    }
    
    .compact-filter-row .gr-column {
        padding: 0 8px !important;
    }
    
    .compact-filter-row .gr-box {
        padding: 0 !important;
    }
    
    /* Compact radio button styling */
    .compact-radio {
        width: 100% !important;
    }
    
    .compact-radio > label {
        font-size: 0.85rem !important;
        margin-bottom: 8px !important;
        font-weight: 600 !important;
        color: var(--text-primary) !important;
        display: block !important;
    }
    
    .compact-radio .wrap {
        display: flex !important;
        flex-wrap: nowrap !important;
        gap: 4px !important;
        justify-content: center !important;
    }
    
    .compact-radio .wrap > label {
        display: inline-flex !important;
        align-items: center !important;
        justify-content: center !important;
        padding: 6px 10px !important;
        margin: 0 !important;
        background: var(--bg-card) !important;
        border: 1px solid var(--border-default) !important;
        border-radius: 8px !important;
        cursor: pointer !important;
        transition: all 0.2s ease !important;
        font-size: 0.75rem !important;
        white-space: nowrap !important;
        flex: 1 !important;
        min-width: 0 !important;
        overflow: hidden !important;
        text-overflow: ellipsis !important;
    }
    
    .compact-radio .wrap > label:has(input[type="radio"]:checked) {
        background: transparent !important;
        border-color: var(--accent-primary) !important;
        color: var(--text-primary) !important;
        font-weight: 600 !important;
    }
    
    .compact-radio .wrap > label:hover {
        background: rgba(255, 210, 30, 0.1) !important;
        border-color: var(--accent-primary) !important;
        transform: scale(1.02) !important;
    }
    
    .compact-radio input[type="radio"] {
        display: none !important;
    }
    
    /* Target Gradio's data attributes for selected state */
    .compact-radio label[data-selected="true"],
    .compact-radio label[aria-checked="true"],
    .domain-radio label[data-selected="true"],
    .domain-radio label[aria-checked="true"] {
        background: transparent !important;
        border-color: var(--accent-primary) !important;
        color: var(--text-primary) !important;
        font-weight: 600 !important;
    }
    
    /* Sort by radio buttons */
    .sort-by-radio .domain-radio {
        display: flex !important;
        gap: 10px !important;
        flex-wrap: wrap !important;
        justify-content: flex-start !important;
    }
    
    .sort-by-radio .domain-radio .wrap {
        display: flex !important;
        gap: 10px !important;
        flex-wrap: wrap !important;
        justify-content: flex-start !important;
        width: 100% !important;
    }
    
    .sort-by-radio .domain-radio label,
    .sort-by-radio .domain-radio .wrap > label {
        min-width: 180px !important;
        max-width: 220px !important;
        padding: 12px 20px !important;
        font-size: 0.95rem !important;
    }
    </style>
    
    """)
    
    level_options = list(level_details.keys())
    
    with gr.Column(elem_classes=["domain-selector-container"], elem_id="task-level-selector"):
        gr.HTML("""
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">🧠 Select Task Level</h2>
            <p class="domain-subtitle" style="color: white;">Ko-AgentBench의 ALL · L1~L7 단계별 에이전트 성능을 손쉽게 비교하세요.</p>
        </div>
        """)
        domain_filter = gr.Radio(
            choices=level_options,
            value=default_level,
            label="",
            interactive=True,
            container=False,
            elem_classes=["domain-radio"]
        )
    
    # Filter controls with domain styling
    with gr.Column(elem_classes=["domain-selector-container", "filters-sorting-container"], elem_id="filters-sorting-container"):
        gr.HTML("""
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">🔍 Filters & Sorting</h2>
            <p class="domain-subtitle" style="color: white;">모델 접근 방식과 정렬 순서를 선택해 맞춤 뷰를 구성하세요.</p>
        </div>
        """)
        with gr.Row(elem_classes=["filters-sorting-row"]):
            with gr.Column(scale=1, elem_classes=["filter-group"]):
                with gr.Row(elem_classes=["filter-group-row"]):
                    gr.HTML("<span class='filter-group-label' style='color: white;'>Model Access</span>")
                    model_type_filter = gr.Radio(
                        choices=["All", "OSS", "API"],
                        value="All",
                        label="",
                        elem_classes=["domain-radio"],
                        container=False
                    )
            with gr.Column(scale=1, elem_classes=["filter-group"]):
                with gr.Row(elem_classes=["filter-group-row"]):
                    gr.HTML("<span class='filter-group-label' style='color: white;'>Sort Order</span>")
                    sort_order = gr.Radio(
                        choices=["Descending", "Ascending"],
                        value="Descending",
                        label="",
                        elem_classes=["domain-radio"],
                        container=False
                    )
    
    # Main leaderboard table with dynamic title
    leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
    
    leaderboard_table = gr.HTML(initial_table)
    
    gr.HTML("""
        </div>
    </div>""")
    
    # Radar Chart Section
    gr.HTML("""
    <div class="domain-selector-container domain-performance-container">
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
            <p class="domain-subtitle" style="color: white;">Track six essential pillars: Success, Execution, Reasoning, Robustness, Efficiency, and Call Validity.</p>
        </div>
    """)
    
    with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="radar-model-selector"):
        gr.HTML("""
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">🎯 Select Models for Comparison</h2>
            <p class="domain-subtitle" style="color: white;">Choose up to 5 models to map on the capability radar.</p>
        </div>
        """)
        model_selector = gr.Dropdown(
            choices=initial_df['Model'].tolist()[:10],
            value=initial_df['Model'].tolist()[:5],
            multiselect=True,
            label="",
            info=None,
            container=False,
            elem_classes=["model-dropdown"]
        )
    
    # Radar chart plot - wrapped in centered container
    gr.HTML('<div class="chart-container radar-chart-container">')
    radar_chart = gr.Plot(
        label="",
        value=create_domain_radar_chart(
            load_leaderboard_data(), 
            initial_df['Model'].tolist()[:5]
        ),
        elem_classes=["radar-chart", "plot-container"]
    )
    gr.HTML('</div>')
    
    gr.HTML("</div>")
    
    # Level metric breakdown section
    gr.HTML("""
    <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">Level-Specific Metric Spotlight</h2>
            <p class="domain-subtitle" style="color: white;">Dive deeper into each Ko-AgentBench stage and compare model scores across its unique evaluation metrics.</p>
        </div>
    """)
    
    with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
        gr.HTML("""
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">🧭 Select Task Level and Models</h2>
            <p class="domain-subtitle" style="color: white;">Choose a level and up to 5 models to explore their detailed SR-driven metrics.</p>
        </div>
        """)
        level_metric_selector = gr.Dropdown(
            choices=level_ids,
            value=level_ids[0] if level_ids else None,
            multiselect=False,
            label="",
            info=None,
            container=False,
            elem_classes=["level-dropdown"]
        )
        level_model_selector = gr.Dropdown(
            choices=initial_level_model_choices,
            value=initial_level_model_values,
            multiselect=True,
            label="",
            info=None,
            container=False,
            elem_classes=["model-dropdown", "level-model-dropdown"]
        )
    
    gr.HTML('<div class="chart-container level-metric-chart-container">')
    level_metric_chart = gr.Plot(
        label="",
        value=initial_level_metric_chart,
        elem_classes=["level-metric-plot", "plot-container"]
    )
    gr.HTML("""
        </div>
    </div>
    """)
    
    # Heatmap section
    gr.HTML("""
    <div class="domain-selector-container domain-performance-container heatmap-wrapper">
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">Comprehensive Performance Heatmap</h2>
            <p class="domain-subtitle" style="color: white;">View Ko-AgentBench SR scores across L1~L7 for each model in a single glance.</p>
        </div>
        <div class="chart-container heatmap-chart-container">
    """)
    heatmap_chart = gr.Plot(
        label="",
        value=initial_heatmap,
        elem_classes=["heatmap-plot", "plot-container"]
    )
    gr.HTML("""
        </div>
    </div>
    """)
    
    # Update functions
    def get_optimal_sort_order(sort_by_value):
        """Return the optimal sort order for a given metric"""
        # Metrics where higher is better (descending)
        descending_metrics = ["Overall Success"] + [sr_column_map[level] for level in level_ids]
        
        # Metrics where lower is better (ascending)  
        ascending_metrics = []
        
        if sort_by_value in descending_metrics:
            return "Descending"
        elif sort_by_value in ascending_metrics:
            return "Ascending"
        else:
            return "Descending"  # Default fallback
    
    def update_table(level_filter, model_type_filter, sort_order):
        title_html = update_leaderboard_title(level_filter)
        sort_metric = "Overall Success" if level_filter == "ALL" else sr_column_map.get(resolve_level(level_filter), "Overall Success")
        table_html = filter_and_sort_data(level_filter, model_type_filter, sort_metric, sort_order)
        return title_html, table_html
    
    def update_radar_chart(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
        # Get filtered dataframe
        df = load_leaderboard_data()
        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
        
        # Update model selector choices based on filtered data
        available_models_all = filtered_df['Model'].tolist()
        available_models = available_models_all[:15]  # Top 15 from filtered results
        
        # If selected models are not in available models, reset to top 5
        if selected_models:
            valid_selected = [m for m in selected_models if m in available_models]
            if not valid_selected:
                valid_selected = available_models[:5]
        else:
            valid_selected = available_models[:5]
        
        # Create radar chart
        chart = create_domain_radar_chart(filtered_df, valid_selected)
        
        # Prepare heatmap order prioritizing selected models
        heatmap_order = []
        for model in valid_selected:
            if model not in heatmap_order:
                heatmap_order.append(model)
        for model in available_models_all:
            if model not in heatmap_order:
                heatmap_order.append(model)
        heatmap_order = heatmap_order[:12]
        heatmap_fig = create_performance_heatmap(filtered_df, heatmap_order)
        
        # Level metric chart
        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
        available_level_models = available_models_all
        if level_selected_models:
            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
            if not valid_level_models:
                valid_level_models = available_level_models[:5]
        else:
            valid_level_models = available_level_models[:5]
        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
        
        return (
            gr.Dropdown(
                choices=available_models,
                value=valid_selected,
                multiselect=True,
                label="",
                info=None,
                container=False,
                elem_classes=["model-dropdown"]
            ),
            chart,
            heatmap_fig,
            gr.Dropdown(
                choices=available_level_models,
                value=valid_level_models,
                multiselect=True,
                label="",
                info=None,
                container=False,
                elem_classes=["model-dropdown", "level-model-dropdown"]
            ),
            level_metric_fig,
        )
    
    def update_radar_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
        # Get filtered dataframe
        df = load_leaderboard_data()
        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
        
        available_models_all = filtered_df['Model'].tolist()
        if selected_models:
            valid_selected = [m for m in selected_models if m in available_models_all]
            if not valid_selected:
                valid_selected = available_models_all[:5]
        else:
            valid_selected = available_models_all[:5]
        
        heatmap_order = []
        for model in valid_selected:
            if model not in heatmap_order:
                heatmap_order.append(model)
        for model in available_models_all:
            if model not in heatmap_order:
                heatmap_order.append(model)
        heatmap_order = heatmap_order[:12]
        
        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
        available_level_models = available_models_all
        if level_selected_models:
            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
            if not valid_level_models:
                valid_level_models = available_level_models[:5]
        else:
            valid_level_models = available_level_models[:5]
        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
        
        return (
            create_domain_radar_chart(filtered_df, valid_selected),
            create_performance_heatmap(filtered_df, heatmap_order),
            gr.Dropdown(
                choices=available_level_models,
                value=valid_level_models,
                multiselect=True,
                label="",
                info=None,
                container=False,
                elem_classes=["model-dropdown", "level-model-dropdown"]
            ),
            level_metric_fig,
        )
    
    def update_level_metric_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
        df = load_leaderboard_data()
        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
        available_models = filtered_df['Model'].tolist()
        if level_selected_models:
            valid_level_models = [m for m in level_selected_models if m in available_models][:5]
            if not valid_level_models:
                valid_level_models = available_models[:5]
        else:
            valid_level_models = available_models[:5]
        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
        level_chart = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
        return (
            gr.Dropdown(
                choices=available_models,
                value=valid_level_models,
                multiselect=True,
                label="",
                info=None,
                container=False,
                elem_classes=["model-dropdown", "level-model-dropdown"]
            ),
            level_chart,
        )

    # Update table when filters change
    filter_inputs = [domain_filter, model_type_filter, sort_order]

    for input_component in filter_inputs:
        input_component.change(
            fn=update_table,
            inputs=filter_inputs,
            outputs=[leaderboard_title, leaderboard_table]
        )
        
        # Also update radar chart when filters change
        input_component.change(
            fn=update_radar_chart,
            inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
            outputs=[model_selector, radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
        )

    # Update radar chart when model selection changes
    model_selector.change(
        fn=update_radar_only,
        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
        outputs=[radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
    )

    level_metric_selector.change(
        fn=update_level_metric_only,
        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
        outputs=[level_model_selector, level_metric_chart]
    )

    level_model_selector.change(
        fn=update_level_metric_only,
        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
        outputs=[level_model_selector, level_metric_chart]
    )
    
    # Define generate_performance_card function before using it
    def generate_performance_card(model_name):
        """Generate HTML for the model performance card"""
        if not model_name:
            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
                Please select a model to generate its performance card
            </div>"""
        
        # Get model data
        df = load_leaderboard_data()
        model_data = df[df['Model'] == model_name]
        
        if model_data.empty:
            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
                Model not found in the database
            </div>"""
        
        row = model_data.iloc[0]
        
        # Get overall rank based on overall success
        df_with_success = df.copy()
        df_with_success['Overall Success'] = pd.to_numeric(df_with_success.get('Overall Success', pd.Series()), errors='coerce')
        df_with_success = df_with_success[df_with_success['Overall Success'].notna()]
        df_sorted = df_with_success.sort_values('Overall Success', ascending=False).reset_index(drop=True)
        try:
            rank = df_sorted[df_sorted['Model'] == model_name].index[0] + 1
        except:
            rank = 'N/A'
        
        # Format values
        def format_value(val, decimals=3, prefix='', suffix=''):
            if pd.isna(val) or val == '':
                return 'N/A'
            return f"{prefix}{float(val):.{decimals}f}{suffix}"
        
        def format_score(value):
            if pd.isna(value) or value == '':
                return 'N/A'
            return f"{float(value):.3f}"
        
        radar_metrics = [
            ("기초 수행력", row.get('Execution Accuracy')),
            ("복합 추론력", row.get('Complex Reasoning')),
            ("견고성", row.get('Robustness')),
            ("맥락 효율성", row.get('Context & Efficiency')),
            ("전반적 성공률", row.get('Overall Success')),
            ("기본적 유효성", row.get('Call Validity')),
        ]
        radar_values = []
        radar_labels = []
        for label, value in radar_metrics:
            if pd.isna(value) or value == '':
                radar_values.append(0.0)
            else:
                try:
                    radar_values.append(max(0.0, min(1.0, float(value))))
                except (TypeError, ValueError):
                    radar_values.append(0.0)
            radar_labels.append(label)
        
        mini_radar_html = build_static_radar_chart(radar_values, radar_labels)
        
        level_blocks = []
        for level in level_ids:
            sr_col = sr_column_map.get(level)
            level_blocks.append((level, row.get(sr_col, '')))
        
        evaluation_date = EVALUATION_DATE
        icon_html = ""
        if KREW_ICON_BASE64:
            icon_html = f'<img src="data:image/png;base64,{KREW_ICON_BASE64}" alt="Krew icon" />'
        else:
            icon_html = '<div class="icon-fallback">🤖</div>'
        
        card_html = f"""
        <div class="performance-card">
            <div class="card-top-row">
                <div class="model-identity">
                    <div class="model-icon">{icon_html}</div>
                    <div class="model-meta">
                        <div class="card-model-name">{model_name}</div>
                        <div class="meta-line">Vendor · <span>{row['Vendor']}</span></div>
                        <div class="meta-line evaluation">
                            <span class="date-pill">Evaluation Date</span>
                            <span>{evaluation_date}</span>
                        </div>
                    </div>
                </div>
                <div class="rank-panel">
                    <div class="rank-label">RANK</div>
                    <div class="rank-value">#{rank}</div>
                </div>
            </div>
            <div class="card-main">
                <div class="card-body">
                    <div class="radar-slot">
                        {mini_radar_html}
                    </div>
                    <div class="core-section">
                        <div class="core-metric-grid">
        """
        ordered_labels = ["기초 수행력", "복합 추론력", "견고성", "맥락 효율성", "전반적 성공률", "기본적 유효성"]
        ordered_metrics = sorted(radar_metrics, key=lambda x: ordered_labels.index(x[0]) if x[0] in ordered_labels else len(ordered_labels))
        top_metrics = ordered_metrics[:3]
        bottom_metrics = ordered_metrics[3:]
        card_html += """
                        <div class="core-metric-row">
        """
        for label, value in top_metrics:
            card_html += f"""
                            <div class="core-metric-card">
                                <div class="metric-label">{label}</div>
                                <div class="metric-value">{format_score(value)}</div>
                            </div>
            """
        card_html += """
                        </div>
                        <div class="core-metric-row">
        """
        for label, value in bottom_metrics:
            card_html += f"""
                            <div class="core-metric-card">
                                <div class="metric-label">{label}</div>
                                <div class="metric-value">{format_score(value)}</div>
                            </div>
            """
        card_html += """
                        </div>
                    </div>
                </div>
            </div>
                <div class="level-strip">
        """
        for level, value in level_blocks:
            card_html += f"""
                        <div class="level-tile">
                            <div class="level-tile-label">{level}</div>
                            <div class="level-tile-score">{format_score(value)}</div>
                        </div>
            """
        card_html += """
                </div>
            </div>
        </div>
        """
        
        return card_html
    
    # MODEL PERFORMANCE CARD SECTION
    gr.HTML("""
    <div class="domain-selector-container performance-card-container">
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">Model Performance Card</h2>
            <p class="domain-subtitle" style="color: white;">Comprehensive performance card for any model - perfect for presentations and reports</p>
        </div>
        <div class="performance-card-content">
    """)
    
    with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
        gr.HTML("""
        <div class="domain-header">
            <h2 class="domain-title" style="color: white;">🤖 Select Model</h2>
            <p class="domain-subtitle" style="color: white;">비교할 모델을 선택하세요.</p>
        </div>
        """)
        card_model_selector = gr.Dropdown(
            choices=initial_df['Model'].tolist(),
            value=initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None,
            label="",
            info=None,
            container=False,
            elem_classes=["model-dropdown"]
        )
        download_card_btn = gr.Button(
            "Download Card as PNG",
            elem_id="download-card-btn",
            elem_classes=["pill-button"]
        )
    
    gr.HTML("""
            <div class="performance-card-display" id="card-display-container">
    """)
    
    # Card display area - generate initial card
    initial_model = initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None
    initial_card_html = generate_performance_card(initial_model) if initial_model else ""
    card_display = gr.HTML(value=initial_card_html, elem_id="performance-card-html")
    
    gr.HTML("""
            </div>
        </div>
    </div>
    """)
    
    # Add custom CSS for the performance card
    gr.HTML("""
    <style>
    /* Performance Card Styles */
    .performance-card {
        background: linear-gradient(140deg, rgba(1, 9, 26, 0.95) 0%, rgba(20, 34, 58, 0.65) 100%);
        border: 1px solid rgba(255, 210, 30, 0.35);
        border-radius: 24px;
        padding: 28px;
        max-width: 820px;
        margin: 0 auto;
        box-shadow: 0 18px 36px rgba(0, 0, 0, 0.35);
        display: flex;
        flex-direction: column;
        gap: 28px;
    }
    
    .card-top-row {
        display: flex;
        justify-content: space-between;
        gap: 24px;
        align-items: stretch;
    }

    .card-main {
        display: flex;
        flex-direction: column;
        gap: 20px;
    }
    
    .model-identity {
        display: flex;
        gap: 20px;
        align-items: center;
    }
    
    .model-icon {
        width: 88px;
        height: 88px;
        background: rgba(245, 246, 247, 0.08);
        border-radius: 20px;
        display: flex;
        align-items: center;
        justify-content: center;
        padding: 12px;
        border: 1px solid rgba(245, 246, 247, 0.12);
    }
    
    .model-icon img {
        width: 100%;
        height: 100%;
        object-fit: contain;
    }
    
    .icon-fallback {
        font-size: 2.8rem;
    }
    
    .model-meta {
        display: flex;
        flex-direction: column;
        gap: 6px;
    }
    
    .card-model-name {
        font-size: 1.9rem;
        font-weight: 800;
        letter-spacing: 0.01em;
        color: var(--text-primary);
    }
    
    .meta-line {
        font-size: 0.95rem;
        color: var(--text-secondary);
        display: flex;
        gap: 6px;
        align-items: center;
    }
    
    .meta-line span {
        color: var(--text-primary);
        font-weight: 600;
    }
    
    .date-pill {
        display: inline-flex;
        align-items: center;
        gap: 6px;
        background: rgba(255, 210, 30, 0.18);
        color: var(--accent-primary);
        padding: 2px 10px;
        border-radius: 999px;
        font-size: 0.75rem;
        font-weight: 600;
        letter-spacing: 0.05em;
        text-transform: uppercase;
    }
    
    .rank-panel {
        min-width: 140px;
        background: rgba(255, 210, 30, 0.12);
        border: 1px solid rgba(255, 210, 30, 0.35);
        border-radius: 20px;
        display: flex;
        flex-direction: column;
        align-items: center;
        justify-content: center;
        padding: 18px;
        gap: 6px;
    }
    
    .rank-label {
        font-size: 0.9rem;
        letter-spacing: 0.1em;
        color: var(--text-secondary);
    }
    
    .rank-value {
        font-size: 2.4rem;
        font-weight: 800;
        color: var(--text-primary);
        letter-spacing: 0.04em;
    }
    
    .radar-legend {
        display: grid;
        grid-template-columns: repeat(auto-fit, minmax(130px, 1fr));
        gap: 10px;
        font-size: 0.8rem;
        color: var(--text-tertiary);
        letter-spacing: 0.04em;
        text-transform: uppercase;
    }
    
    .card-body {
        display: grid;
        grid-template-columns: 240px 1fr;
        gap: 28px;
        align-items: start;
    }
    
    .radar-slot {
        width: 240px;
        height: 240px;
        display: flex;
        align-items: center;
        justify-content: center;
        border-radius: 20px;
        background: rgba(245, 246, 247, 0.04);
        border: 1px dashed rgba(245, 246, 247, 0.12);
        padding: 10px;
        box-sizing: border-box;
        justify-self: center;
    }
    
    .radar-slot svg {
        width: 100%;
        height: 100%;
    }
    
    .heatmap-wrapper {
        margin-top: 40px;
    }
    
    .heatmap-chart-container {
        padding: 0 20px 32px;
    }
            
    .radar-chart-container {
        padding: 0 20px 32px;
    }            
    
    .heatmap-plot {
        width: 100%;
        max-width: 1400px;
        margin: 0 auto;
        border-radius: 22px;
        overflow: hidden;
        background: rgba(245, 246, 247, 0.02);
        border: 1px solid var(--border-subtle);
        box-shadow: 0 14px 40px rgba(0, 0, 0, 0.35);
    }
    
    .heatmap-plot .modebar {
        display: none;
    }

    .level-metrics-wrapper {
        margin-top: 40px;
    }
    
    .level-metric-controls {
        justify-content: center;
        align-items: center;
        margin-bottom: 20px;
        gap: 12px;
    }
    
    .level-metric-chart-container {
        padding: 0 20px 32px;
    }
    
    .level-metric-plot {
        width: 100%;
        max-width: 1400px;
        margin: 0 auto;
        border-radius: 22px;
        overflow: hidden;
        background: rgba(245, 246, 247, 0.02);
        border: 1px solid var(--border-subtle);
        box-shadow: 0 14px 40px rgba(0, 0, 0, 0.35);
    }
    
    .level-dropdown {
        max-width: 260px !important;
        margin: 0 auto !important;
    }
    
    .level-dropdown select,
    .level-dropdown [role="combobox"],
    .level-dropdown button {
        background: rgba(245, 246, 247, 0.06) !important;
        border: 1px solid var(--border-subtle) !important;
        border-radius: 999px !important;
        padding: 12px 20px !important;
        color: var(--text-primary) !important;
        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
        font-weight: 600 !important;
        font-size: 0.95rem !important;
        text-align: center !important;
        min-height: 46px !important;
        transition: all 0.3s ease !important;
        box-shadow: 0 10px 24px rgba(255, 210, 30, 0.15) !important;
    }
    
    .level-dropdown select:hover,
    .level-dropdown [role="combobox"]:hover,
    .level-dropdown button:hover:not(:disabled) {
        transform: translateY(-1px) !important;
        border-color: var(--accent-primary) !important;
        box-shadow: 0 12px 28px rgba(255, 210, 30, 0.25) !important;
        background: rgba(255, 210, 30, 0.12) !important;
    }
    
    .level-model-dropdown {
        width: 100% !important;
        margin: 12px auto 0 !important;
    }
    
    .radar-placeholder {
        display: flex;
        flex-direction: column;
        gap: 10px;
        color: var(--text-secondary);
        font-size: 1rem;
        letter-spacing: 0.03em;
    }
    
    .radar-placeholder small {
        font-size: 0.68rem;
        line-height: 1.6;
        color: var(--text-tertiary);
    }
    
    .core-section {
        display: flex;
        flex-direction: column;
        gap: 20px;
    }
    
    .core-metric-grid {
        display: flex;
        flex-direction: column;
        gap: 12px;
    }
    
    .core-metric-row {
        display: grid;
        grid-template-columns: repeat(3, minmax(150px, 1fr));
        gap: 12px;
    }
    
    .core-metric-card {
        background: rgba(245, 246, 247, 0.06);
        border: 1px solid rgba(245, 246, 247, 0.12);
        border-radius: 16px;
        padding: 18px;
        display: flex;
        flex-direction: column;
        gap: 8px;
        transition: transform 0.25s ease, border-color 0.25s ease;
    }
    
    .core-metric-card:hover {
        transform: translateY(-4px);
        border-color: rgba(255, 210, 30, 0.45);
    }
    
    .core-metric-card .metric-label {
        font-size: 0.95rem;
        color: var(--text-secondary);
        font-weight: 600;
    }
    
    .core-metric-card .metric-value {
        font-size: 1.8rem;
        font-weight: 700;
        color: var(--text-primary);
        font-family: 'Geist Mono', monospace;
    }
    
    .level-strip {
        display: grid;
        grid-template-columns: repeat(7, 1fr);
        gap: 12px;
        margin-top: 0;
        width: 100%;
    }
    
    .level-tile {
        background: rgba(245, 246, 247, 0.05);
        border: 1px solid rgba(245, 246, 247, 0.1);
        border-radius: 12px;
        padding: 12px 10px;
        display: flex;
        flex-direction: column;
        gap: 4px;
        text-align: center;
        min-width: 0;
    }
    
    .level-tile-label {
        font-size: 0.82rem;
        font-weight: 600;
        color: var(--text-secondary);
    }
    
    .level-tile-score {
        font-size: 1.25rem;
        font-weight: 700;
        color: var(--text-primary);
        font-family: 'Geist Mono', monospace;
    }
    @media (max-width: 980px) {
        .performance-card {
            padding: 20px;
        }
        .card-top-row {
            flex-direction: column;
        }
        .rank-panel {
            align-self: flex-start;
        }
        .card-body {
            grid-template-columns: 1fr;
        }
        .radar-slot {
            width: 100%;
            max-width: 280px;
            margin: 0 auto;
        }
        .radar-chart-container {
            overflow-x: auto;
            padding: 0 12px 28px;
            -webkit-overflow-scrolling: touch;
        }
        .radar-metric-plot {
            min-width: 720px;
        }
        .level-strip {
            grid-template-columns: repeat(4, 1fr);
        }
        .heatmap-chart-container {
            overflow-x: auto;
            padding: 0 12px 28px;
            -webkit-overflow-scrolling: touch;
        }
        .heatmap-plot {
            min-width: 720px;
        }
        .level-metric-chart-container {
            overflow-x: auto;
            padding: 0 12px 28px;
            -webkit-overflow-scrolling: touch;
        }
        .level-metric-plot {
            min-width: 720px;
        }
    }
    @media (max-width: 640px) {
        .level-strip {
            grid-template-columns: repeat(3, 1fr);
        }
        .heatmap-plot {
            min-width: 640px;
        }
        .level-metric-plot {
            min-width: 640px;
        }
    }
    
    </style>
    
    """)
    
    
    # Wire up the card generator to selection change
    card_model_selector.change(
        fn=generate_performance_card,
        inputs=[card_model_selector],
        outputs=[card_display]
    )
    
    # Wire up download button with html2canvas capture
    download_card_btn.click(
        fn=None,
        js="""
        async () => {
            const ensureHtml2Canvas = () => new Promise((resolve, reject) => {
                if (window.html2canvas) {
                    resolve(window.html2canvas);
                    return;
                }
                const existing = document.querySelector('script[data-html2canvas]');
                if (existing) {
                    existing.addEventListener('load', () => resolve(window.html2canvas));
                    existing.addEventListener('error', reject);
                    return;
                }
                const script = document.createElement('script');
                script.src = 'https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js';
                script.async = true;
                script.dataset.html2canvas = 'true';
                script.onload = () => resolve(window.html2canvas);
                script.onerror = () => reject(new Error('Failed to load html2canvas'));
                document.head.appendChild(script);
            });
            
            const pause = (ms) => new Promise(resolve => setTimeout(resolve, ms));
            await pause(60);
            
            const card = document.querySelector('.performance-card');
            if (!card) {
                alert('Performance card not found. Please select a model first.');
                return;
            }
            
            const btn = document.getElementById('download-card-btn');
            const originalText = btn?.textContent || '';
            if (btn) {
                btn.textContent = 'Generating...';
                btn.disabled = true;
            }
            
            try {
                const html2canvasLib = await ensureHtml2Canvas();
                if (!html2canvasLib) {
                    throw new Error('html2canvas unavailable');
                }
                
                const canvas = await html2canvasLib(card, {
                    backgroundColor: '#01091A',
                    scale: 2,
                    logging: false,
                    useCORS: true
                });
                
                const link = document.createElement('a');
                const modelName = card.querySelector('.card-model-name')?.textContent || 'model';
                const timestamp = new Date().toISOString().slice(0, 10);
                const fileName = `${modelName.replace(/[^a-z0-9]/gi, '-').toLowerCase()}-performance-${timestamp}.png`;
                link.download = fileName;
                link.href = canvas.toDataURL('image/png');
                document.body.appendChild(link);
                link.click();
                document.body.removeChild(link);
            } catch (error) {
                console.error('Error capturing card:', error);
                alert('Failed to capture performance card. Please try again.');
            } finally {
                if (btn) {
                    btn.textContent = originalText;
                    btn.disabled = false;
                }
            }
        }
        """
    )
    
    # Also update card when filters change to keep model selector in sync
    for input_component in filter_inputs:
        def update_dropdown_and_card(*args):
            filtered_df, _, _ = apply_filters(
                load_leaderboard_data(),
                args[0],
                args[1],
                args[2],
                "Overall Success" if args[0] == "ALL" else sr_column_map.get(resolve_level(args[0]), "Overall Success")
            )
            choices = filtered_df['Model'].tolist()
            # Select first model from filtered list
            value = choices[0] if choices else None
            return gr.Dropdown(
                choices=choices,
                value=value,
                label="",
                info=None,
                container=False,
                elem_classes=["model-dropdown"]
            )
        
        input_component.change(
            fn=update_dropdown_and_card,
            inputs=filter_inputs,
            outputs=[card_model_selector]
        )
    
    return leaderboard_table


def create_leaderboard_v2_interface():
    """Create the complete leaderboard v1 interface"""
    return create_leaderboard_v2_tab()


def create_domain_radar_chart(df, selected_models=None, max_models=5):
    """Visualize six core capability metrics on a radar chart."""
    df = df.copy()
    metrics_info = [
        {"column": "Overall Success", "label": "Overall Success", "description": "Average SR across L1-L7"},
        {"column": "Execution Accuracy", "label": "Execution Accuracy", "description": "CallEM · ArgAcc · SelectAcc"},
        {"column": "Complex Reasoning", "label": "Complex Reasoning", "description": "ProvAcc · PSM · Coverage"},
        {"column": "Robustness", "label": "Robustness", "description": "AdaptiveRouting · FallbackSR"},
        {"column": "Context & Efficiency", "label": "Context & Efficiency", "description": "ReuseRate · EffScore · ContextRetention"},
        {"column": "Call Validity", "label": "Call Validity", "description": "Average EPR_CVR across levels"},
    ]
    
    required_columns = [m["column"] for m in metrics_info]
    if df.empty or not any(col in df.columns for col in required_columns):
        return create_empty_radar_chart("Not enough data to build the capability radar")
    
    # Default model selection
    if not selected_models:
        if "Overall Success" in df.columns:
            top_models = df.sort_values("Overall Success", ascending=False)
        else:
            top_models = df
        selected_models = top_models['Model'].head(max_models).tolist()
    
    selected_models = selected_models[:max_models]
    
    # Ensure metric columns are numeric
    for metric in metrics_info:
        col = metric["column"]
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    fig = go.Figure()
    angle_labels = [m["label"] for m in metrics_info]
    
    palette = [
        {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
        {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
        {'fill': 'rgba(249, 112, 185, 0.22)', 'line': '#F970B9'},
        {'fill': 'rgba(139, 92, 246, 0.20)', 'line': '#8B5CF6'},
        {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
    ]
    
    for idx, model_name in enumerate(selected_models):
        model_data = df[df['Model'] == model_name]
        if model_data.empty:
            continue
        
        row = model_data.iloc[0]
        values = []
        tooltips = []
        for metric in metrics_info:
            col = metric["column"]
            value = row[col] if col in row else float('nan')
            if pd.isna(value) or value == '':
                value = 0
            values.append(float(value))
            tooltips.append(metric["description"])
        
        if not values:
            continue
        
        values_loop = values + [values[0]]
        angles_loop = angle_labels + [angle_labels[0]]
        tooltips_loop = tooltips + [tooltips[0]]
        colors = palette[idx % len(palette)]
        
        fig.add_trace(
            go.Scatterpolar(
                r=values_loop,
                theta=angles_loop,
                fill='toself',
                fillcolor=colors['fill'],
                line=dict(color=colors['line'], width=3),
                marker=dict(
                    size=10,
                    color=colors['line'],
                    symbol='circle',
                    line=dict(width=2, color='#01091A')
                ),
                name=model_name,
                customdata=tooltips_loop,
                mode="lines+markers",
                hovertemplate="<b>%{fullData.name}</b><br>" +
                              "<span style='color: #94A3B8'>%{theta}</span><br>" +
                              "<span style='color: #F5E7CB; font-size: 12px;'>%{customdata}</span><br>" +
                              "<b style='font-size: 14px; color: #F5F6F7'>%{r:.3f}</b><br>" +
                              "<extra></extra>",
                hoverlabel=dict(
                    bgcolor="rgba(1, 9, 26, 0.95)",
                    bordercolor=colors['line'],
                    font=dict(color="white", size=12, family="'Geist', sans-serif")
                )
            )
        )
    
    tick_vals = [i / 5 for i in range(6)]
    tick_text = [f"{val:.2f}" for val in tick_vals]
    
    fig.update_layout(
        polar=dict(
            bgcolor='rgba(245, 246, 247, 0.03)',
            radialaxis=dict(
                visible=True,
                range=[0, 1],
                showline=True,
                linewidth=2,
                linecolor='rgba(245, 246, 247, 0.2)',
                gridcolor='rgba(245, 246, 247, 0.1)',
                gridwidth=1,
                tickvals=tick_vals,
                ticktext=tick_text,
                tickfont=dict(
                    size=11,
                    color='white',
                    family="'Geist Mono', monospace"
                )
            ),
            angularaxis=dict(
                showline=True,
                linewidth=2,
                linecolor='rgba(245, 246, 247, 0.2)',
                gridcolor='rgba(245, 246, 247, 0.08)',
                tickfont=dict(
                    size=13,
                    family="'Geist', sans-serif",
                    color='white',
                    weight=600
                ),
                rotation=90,
                direction="clockwise",
            ),
        ),
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.15,
            xanchor="center",
            x=0.5,
            font=dict(size=12, family="'Geist', sans-serif", color='white'),
            bgcolor='rgba(1, 9, 26, 0.8)',
            bordercolor='rgba(245, 246, 247, 0.2)',
            borderwidth=1,
            itemsizing='constant',
            itemwidth=30
        ),
        title=dict(
            text="<b>Core Capability Radar</b>",
            x=0.5,
            y=0.97,
            font=dict(
                size=22,
                family="'Geist', sans-serif",
                color="white",
                weight=700
            ),
        ),
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=800,
        width=900,
        margin=dict(t=30, b=50, l=10, r=10),
        autosize=True,
        annotations=[
            dict(
                text="Galileo Agent Leaderboard",
                xref="paper", yref="paper",
                x=0.98, y=0.02,
                xanchor='right', yanchor='bottom',
                font=dict(size=10, color='#64748B'),
                showarrow=False
            )
        ]
    )
    
    return fig


def create_performance_heatmap(df, ordered_models=None, max_models=12):
    """Render a heatmap of SR scores across task levels for selected models."""
    df = df.copy()
    level_sequence = [f"L{i}" for i in range(1, 8)]
    sr_columns = []
    for level in level_sequence:
        col = f"{level}_SR"
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")
            sr_columns.append((level, col))
    if df.empty or not sr_columns:
        return create_empty_heatmap("Not enough SR data to render the heatmap")
    
    df = df.drop_duplicates(subset=["Model"])
    if df.empty:
        return create_empty_heatmap("No models available to render the heatmap")
    
    sort_column = "Overall Success" if "Overall Success" in df.columns else sr_columns[0]
    df = df.sort_values(sort_column, ascending=False)
    
    if ordered_models:
        ordered_models = [m for m in ordered_models if m in df["Model"].tolist()]
    else:
        ordered_models = df["Model"].tolist()
    
    if not ordered_models:
        return create_empty_heatmap("No models available to render the heatmap")
    
    ordered_models = ordered_models[:max_models]
    heatmap_df = df.set_index("Model").reindex(ordered_models)
    
    level_labels = []
    z_matrix = []
    has_values = False
    
    for level, col in sr_columns:
        if col not in heatmap_df.columns:
            continue
        label = f"{level} · SR"
        level_labels.append(label)
        row_values = []
        for model in ordered_models:
            value = heatmap_df.at[model, col] if model in heatmap_df.index else None
            if pd.isna(value):
                row_values.append(None)
            else:
                val = float(value)
                row_values.append(val)
                has_values = True
        z_matrix.append(row_values)
    
    if not level_labels or not has_values:
        return create_empty_heatmap("Not enough SR data to render the heatmap")
    
    colorscale = [
        [0.0, "#0A0A0A"],
        [0.25, "#1A1411"],
        [0.5, "#332818"],
        [0.75, "#B8660A"],
        [1.0, "#FFD21E"],
    ]
    
    fig = go.Figure()
    fig.add_trace(
        go.Heatmap(
            z=z_matrix,
            x=ordered_models,
            y=level_labels,
            colorscale=colorscale,
            zmin=0,
            zmax=1,
            hovertemplate="<b>%{y}</b><br><span style='color:#FFD21E'>%{x}</span><br>SR · %{z:.3f}<extra></extra>",
            colorbar=dict(
                title="Success Rate",
                titlefont=dict(color="white", family="'Geist', sans-serif", size=12),
                tickfont=dict(color="white", family="'Geist', sans-serif", size=10),
                thickness=12,
                len=0.7,
                outlinecolor="rgba(255, 255, 255, 0.1)",
                bgcolor="rgba(1, 9, 26, 0.75)"
            ),
            showscale=True
        )
    )
    
    annotations = []
    for y_idx, level in enumerate(level_labels):
        for x_idx, model in enumerate(ordered_models):
            value = z_matrix[y_idx][x_idx]
            if value is None:
                continue
            font_color = "#0B1120" if value >= 0.6 else "#F8FAFC"
            annotations.append(
                dict(
                    x=model,
                    y=level,
                    text=f"{value:.3f}",
                    showarrow=False,
                    font=dict(
                        family="'Geist Mono', monospace",
                        size=11,
                        color=font_color
                    )
                )
            )
    
    fig.update_layout(
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        margin=dict(t=80, b=90, l=110, r=160),
        height=520,
        width=1450,
        font=dict(family="'Geist', sans-serif", color="white"),
        xaxis=dict(
            tickangle=-25,
            showgrid=False,
            ticks="",
            tickfont=dict(size=11, family="'Geist', sans-serif", color="white")
        ),
        yaxis=dict(
            showgrid=False,
            ticks="",
            tickfont=dict(size=12, family="'Geist', sans-serif", color="white")
        ),
        annotations=annotations,
        title=dict(
            text="<b>Comprehensive Performance Heatmap</b>",
            x=0.5,
            y=0.98,
            font=dict(
                size=20,
                family="'Geist', sans-serif",
                color="white",
                weight=700
            ),
        )
    )
    fig.update_xaxes(side="bottom")
    
    return fig


def create_empty_heatmap(message):
    """Render an empty state for the heatmap with a centered message."""
    fig = go.Figure()
    fig.add_annotation(
        text=f"🗺️ {message}",
        xref="paper", yref="paper",
        x=0.5, y=0.5,
        xanchor='center', yanchor='middle',
        font=dict(
            size=18,
            color="white",
            family="'Geist', sans-serif"
        ),
        showarrow=False,
        bgcolor="rgba(245, 246, 247, 0.05)",
        bordercolor="rgba(245, 246, 247, 0.2)",
        borderwidth=1,
        borderpad=20
    )
    
    fig.update_layout(
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=520,
        width=1450,
        margin=dict(t=80, b=80, l=80, r=160),
        title=dict(
            text="<b>Comprehensive Performance Heatmap</b>",
            x=0.5,
            y=0.98,
            font=dict(
                size=20,
                family="'Geist', sans-serif",
                color="white",
                weight=700
            ),
        )
    )
    fig.update_xaxes(visible=False)
    fig.update_yaxes(visible=False)
    return fig


def create_level_metric_chart(df, level, selected_models=None, max_models=5):
    """Render a grouped horizontal bar chart showing per-model scores for a level's metrics."""
    if not level:
        return create_empty_level_metric_chart("Select a level to view its metrics")
    df = df.copy()
    level_prefix = f"{level}_"
    level_columns = [col for col in df.columns if col.startswith(level_prefix)]
    metric_columns = []
    for col in level_columns:
        metric_suffix = col[len(level_prefix):]
        metric_key_lower = metric_suffix.lower()
        if "cost" in metric_key_lower:
            continue
        numeric_series = pd.to_numeric(df[col], errors='coerce')
        valid_values = numeric_series.dropna()
        if valid_values.empty:
            continue
        if (valid_values < 0).any() or (valid_values > 1.05).any():
            continue
        df[col] = numeric_series
        metric_columns.append(col)
    if not metric_columns:
        return create_empty_level_metric_chart("This level has no 0-1 metrics to visualize")
    df = df.drop_duplicates(subset=['Model'])
    if df.empty:
        return create_empty_level_metric_chart("No models available to render level metrics")
    if selected_models:
        model_order = [m for m in selected_models if m in df['Model'].tolist()]
    else:
        sort_col = 'Overall Success' if 'Overall Success' in df.columns else metric_columns[0]
        model_order = df.sort_values(sort_col, ascending=False)['Model'].tolist()
    if not model_order:
        model_order = df['Model'].tolist()
    model_order = model_order[:max_models]
    df_models = df[df['Model'].isin(model_order)].set_index('Model')
    if df_models.empty:
        return create_empty_level_metric_chart("No matching models for selected filters")
    def prettify_metric_name(metric_key):
        raw = metric_key[len(level_prefix):]
        text = raw.replace('_', ' ')
        text = re.sub(r'(?<=.)([A-Z])', r' \1', text)
        text = text.replace('Avg', 'Average')
        replacements = {
            'Sr': 'SR',
            'Ac': 'AC',
            'Tsq': 'TSQ',
            'Cvr': 'CVR',
            'Psm': 'PSM',
            'Prov': 'Prov',
            'Call Em': 'CallEM',
            'Reuse Rate': 'Reuse Rate',
            'Eff Score': 'Eff Score'
        }
        words = text.title().split()
        words = [replacements.get(word, word) for word in words]
        return ' '.join(words)
    metric_labels = []
    for col in metric_columns:
        label = prettify_metric_name(col)
        if label in metric_labels:
            suffix = 2
            while f"{label} ({suffix})" in metric_labels:
                suffix += 1
            label = f"{label} ({suffix})"
        metric_labels.append(label)
    model_palette = [
        '#ffd21e',
        '#FF8A3C',
        '#F970B9',
        '#8B5CF6',
        '#F8FAFC',
        '#38BDF8',
    ]
    fig = go.Figure()
    max_value = 0
    for idx, model in enumerate(model_order):
        values = []
        for col in metric_columns:
            value = df_models.at[model, col] if (model in df_models.index and col in df_models.columns) else float('nan')
            if pd.notna(value):
                values.append(float(value))
                max_value = max(max_value, float(value))
            else:
                values.append(None)
        color = model_palette[idx % len(model_palette)]
        fig.add_trace(
            go.Bar(
                name=model,
                y=metric_labels,
                x=values,
                orientation='h',
                marker=dict(color=color, line=dict(color='rgba(1,9,26,0.8)', width=1)),
                hovertemplate="<b>%{y}</b><br>Model · <span style='color:#FFD21E'>%{fullData.name}</span><br>Score · %{x:.3f}<extra></extra>",
            )
        )
    plot_height = max(360, 140 + 48 * len(metric_labels))
    if max_value <= 0:
        x_range = [0, 1]
    else:
        x_range = [0, max_value * 1.05]
    fig.update_layout(
        barmode='group',
        bargap=0.25,
        bargroupgap=0.18,
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=plot_height,
        width=1450,
        margin=dict(t=90, b=80, l=220, r=160),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            bgcolor='rgba(1, 9, 26, 0.75)',
            bordercolor='rgba(245, 246, 247, 0.2)',
            borderwidth=1,
            font=dict(size=11, family="'Geist', sans-serif", color='white')
        ),
        xaxis=dict(
            title=dict(text=f"<b>{level} Metric Score</b>", font=dict(size=14, color="white")),
            tickfont=dict(size=11, color="white"),
            gridcolor='rgba(245, 246, 247, 0.08)',
            zerolinecolor='rgba(245, 246, 247, 0.18)',
            range=x_range
        ),
        yaxis=dict(
            tickfont=dict(size=13, color="white"),
            automargin=True
        ),
        title=dict(
            text=f"<b>{level} Metric Breakdown</b>",
            x=0.5,
            y=0.98,
            font=dict(size=20, family="'Geist', sans-serif", color="white", weight=700)
        )
    )
    return fig


def create_empty_level_metric_chart(message):
    fig = go.Figure()
    fig.add_annotation(
        text=f"🧭 {message}",
        xref="paper", yref="paper",
        x=0.5, y=0.5,
        xanchor='center', yanchor='middle',
        font=dict(size=18, color="white", family="'Geist', sans-serif"),
        showarrow=False,
        bgcolor="rgba(245, 246, 247, 0.05)",
        bordercolor="rgba(245, 246, 247, 0.2)",
        borderwidth=1,
        borderpad=20
    )
    fig.update_layout(
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=420,
        width=1450,
        margin=dict(t=80, b=60, l=80, r=120),
        title=dict(
            text="<b>Level Metric Breakdown</b>",
            x=0.5,
            y=0.98,
            font=dict(size=20, family="'Geist', sans-serif", color="white", weight=700)
        )
    )
    fig.update_xaxes(visible=False)
    fig.update_yaxes(visible=False)
    return fig


def create_empty_radar_chart(message):
    """Create an empty radar chart with a message"""
    fig = go.Figure()
    
    fig.add_annotation(
        text=f"📊 {message}",
        xref="paper", yref="paper",
        x=0.5, y=0.5,
        xanchor='center', yanchor='middle',
        font=dict(
            size=18, 
            color="white",
            family="'Geist', sans-serif"
        ),
        showarrow=False,
        bgcolor="rgba(245, 246, 247, 0.05)",
        bordercolor="rgba(245, 246, 247, 0.2)",
        borderwidth=1,
        borderpad=20
    )
    
    fig.update_layout(
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)", 
        height=1450,
        width=1450,
        margin=dict(t=100, b=80, l=80, r=200),
        title=dict(
            text="<b>Core Capability Radar</b>",
            x=0.5,
            y=0.97,
            font=dict(
                size=22, 
                family="'Geist', sans-serif", 
                color="white",
                weight=700
            ),
        ),
        annotations=[
            dict(
                xref="paper", yref="paper",
                x=0.98, y=0.02,
                xanchor='right', yanchor='bottom',
                font=dict(size=10, color='#64748B'),
                showarrow=False
            )
        ]
    )
    
    return fig


# NEW VISUALIZATION FUNCTIONS

def create_cost_performance_scatter(df, metric="Avg AC"):
    """Create scatter plot showing cost vs performance efficiency"""
    # Filter out models without cost or performance data
    df_filtered = df[(df['Avg Total Cost'] != '') & (df[metric] != '')].copy()
    label_map = {
        'Proprietary': 'API',
        'Open source': 'OSS'
    }
    
    if df_filtered.empty:
        return create_empty_chart("No data available for cost-performance analysis")
    
    # Convert to numeric
    df_filtered['Avg Total Cost'] = pd.to_numeric(df_filtered['Avg Total Cost'], errors='coerce')
    df_filtered[metric] = pd.to_numeric(df_filtered[metric], errors='coerce')
    df_filtered['Avg Turns'] = pd.to_numeric(df_filtered['Avg Turns'], errors='coerce')
    
    # Create color mapping for model type
    color_map = {
        'Proprietary': '#1098F7',  # Airglow Blue for Proprietary
        'Open source': '#58BC82'   # Green for Open source
    }
    df_filtered['Color'] = df_filtered['Model Type'].map(color_map).fillna('#F5F6F7')
    
    fig = go.Figure()
    
    # Add scatter points
    for model_type in df_filtered['Model Type'].unique():
        df_type = df_filtered[df_filtered['Model Type'] == model_type]
        legend_name = label_map.get(model_type, model_type)
        
        fig.add_trace(go.Scatter(
            x=df_type[metric],
            y=df_type['Avg Total Cost'],
            mode='markers+text',
            name=legend_name,
            text=df_type['Model'],
            textposition="top center",
            textfont=dict(size=10, color='white'),
            marker=dict(
                size=df_type['Avg Turns'] * 3,  # Size based on number of turns
                color=color_map.get(model_type, '#F5F6F7'),
                opacity=0.8,
                line=dict(width=2, color='#01091A')
            ),
            hovertemplate="<b>%{text}</b><br>" +
                         f"{metric}: %{{x:.3f}}<br>" +
                         "Cost: $%{y:.3f}<br>" +
                         "Turns: %{marker.size:.1f}<br>" +
                         "<extra></extra>"
        ))
    
    # Add quadrant lines
    median_x = df_filtered[metric].median()
    median_y = df_filtered['Avg Total Cost'].median()
    
    fig.add_hline(y=median_y, line_dash="dash", line_color="#64748B", opacity=0.5)
    fig.add_vline(x=median_x, line_dash="dash", line_color="#64748B", opacity=0.5)
    
    # Add quadrant labels
    fig.add_annotation(x=0.95, y=0.05, text="💎 High Performance<br>Low Cost", 
                      showarrow=False, xref="paper", yref="paper",
                      font=dict(size=12, color="white"), bgcolor="rgba(245, 246, 247, 0.1)")
    fig.add_annotation(x=0.05, y=0.95, text="⚠️ Low Performance<br>High Cost", 
                      showarrow=False, xref="paper", yref="paper",
                      font=dict(size=12, color="#ffd21e"), bgcolor="rgba(255, 210, 30, 0.1)")
    
    metric_display = "Action Completion" if metric == "Avg AC" else "Tool Selection Quality"
    
    fig.update_layout(
        title=dict(
            text=f"<b>Cost-Performance Efficiency: {metric_display}</b>",
            x=0.5,
            y=0.97,
            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
        ),
        xaxis=dict(
            title=dict(
                text=f"<b>{metric_display}</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=12, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)",
            zerolinecolor="rgba(245, 246, 247, 0.2)"
        ),
        yaxis=dict(
            title=dict(
                text="<b>Average Session Cost ($)</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=12, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)",
            zerolinecolor="rgba(245, 246, 247, 0.2)"
        ),
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=900,
        width=1450,
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            font=dict(size=12, family="'Geist', sans-serif", color='white'),
            bgcolor='rgba(1, 9, 26, 0.8)',
            bordercolor='rgba(245, 246, 247, 0.2)',
            borderwidth=1
        ),
        margin=dict(t=100, b=80, l=80, r=80)
    )
    
    return fig


def create_speed_accuracy_plot(df, metric="Avg AC"):
    """Create scatter plot showing speed vs accuracy trade-off"""
    # Filter out models without duration or performance data
    df_filtered = df[(df['Avg Session Duration'] != '') & (df[metric] != '')].copy()
    
    if df_filtered.empty:
        return create_empty_chart("No data available for speed-accuracy analysis")
    
    # Convert to numeric
    df_filtered['Avg Session Duration'] = pd.to_numeric(df_filtered['Avg Session Duration'], errors='coerce')
    df_filtered[metric] = pd.to_numeric(df_filtered[metric], errors='coerce')
    
    # Create color scale based on cost
    df_filtered['Avg Total Cost'] = pd.to_numeric(df_filtered['Avg Total Cost'], errors='coerce')
    
    fig = go.Figure()
    
    # Add scatter trace
    fig.add_trace(go.Scatter(
        x=df_filtered[metric],
        y=df_filtered['Avg Session Duration'],
        mode='markers+text',
        text=df_filtered['Model'],
        textposition="top center",
        textfont=dict(size=9, color='white'),
        marker=dict(
            size=12,
            color=df_filtered['Avg Total Cost'],
            colorscale=[[0, '#0A0A0A'], [0.5, '#B8660A'], [1, '#ffd21e']],
            showscale=True,
            colorbar=dict(
                title=dict(
                    text="Cost ($)",
                    font=dict(color="white")
                ),
                tickfont=dict(color="white"),
                bgcolor="rgba(1, 9, 26, 0.8)",
                bordercolor="rgba(245, 246, 247, 0.2)",
                borderwidth=1,
                x=1.02
            ),
            line=dict(width=2, color='#01091A')
        ),
        hovertemplate="<b>%{text}</b><br>" +
                     f"{metric}: %{{x:.3f}}<br>" +
                     "Duration: %{y:.1f}s<br>" +
                     "Cost: $%{marker.color:.3f}<br>" +
                     "<extra></extra>"
    ))
    
    # Add quadrant lines
    median_x = df_filtered[metric].median()
    median_y = df_filtered['Avg Session Duration'].median()
    
    fig.add_hline(y=median_y, line_dash="dash", line_color="#64748B", opacity=0.5)
    fig.add_vline(x=median_x, line_dash="dash", line_color="#64748B", opacity=0.5)
    
    # Add quadrant labels
    fig.add_annotation(x=0.95, y=0.05, text="⚡ Fast & Accurate", 
                      showarrow=False, xref="paper", yref="paper",
                      font=dict(size=12, color="white", weight=600))
    fig.add_annotation(x=0.05, y=0.95, text="🐌 Slow & Inaccurate", 
                      showarrow=False, xref="paper", yref="paper",
                      font=dict(size=12, color="#ffd21e", weight=600))
    
    metric_display = "Action Completion" if metric == "Avg AC" else "Tool Selection Quality"
    
    fig.update_layout(
        title=dict(
            text=f"<b>Speed vs Accuracy Trade-off: {metric_display}</b>",
            x=0.5,
            y=0.97,
            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
        ),
        xaxis=dict(
            title=dict(
                text=f"<b>{metric_display}</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=12, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)",
            zerolinecolor="rgba(245, 246, 247, 0.2)"
        ),
        yaxis=dict(
            title=dict(
                text="<b>Average Session Duration (seconds)</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=12, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)",
            zerolinecolor="rgba(245, 246, 247, 0.2)"
        ),
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=900,
        width=1450,
        margin=dict(t=100, b=80, l=80, r=120)
    )
    
    return fig


def create_domain_specialization_matrix(df, metric_type="AC"):
    """Create bubble chart showing domain specialization"""
    domains = ['Banking', 'Healthcare', 'Insurance', 'Investment', 'Telecom']
    
    # Prepare data
    data = []
    for _, model in df.iterrows():
        if model['Model'] == '':
            continue
            
        model_avg = pd.to_numeric(model[f'Avg {metric_type}'], errors='coerce')
        if pd.isna(model_avg):
            continue
            
        for domain in domains:
            domain_col = f'{domain} {metric_type}'
            if domain_col in model and model[domain_col] != '':
                domain_val = pd.to_numeric(model[domain_col], errors='coerce')
                if not pd.isna(domain_val):
                    # Calculate specialization strength (deviation from model average)
                    specialization = domain_val - model_avg
                    data.append({
                        'Model': model['Model'],
                        'Domain': domain,
                        'Performance': domain_val,
                        'Specialization': specialization,
                        'Model Type': model['Model Type']
                    })
    
    if not data:
        return create_empty_chart("No domain specialization data available")
    
    df_plot = pd.DataFrame(data)
    
    # Create bubble chart
    fig = go.Figure()
    
    # Color based on specialization strength
    fig.add_trace(go.Scatter(
        x=df_plot['Domain'],
        y=df_plot['Model'],
        mode='markers',
        marker=dict(
            size=df_plot['Performance'] * 30,  # Size based on absolute performance
            color=df_plot['Specialization'],
            colorscale=[[0, '#B8660A'], [0.5, '#E6B800'], [1, '#ffd21e']],
            showscale=True,
            colorbar=dict(
                title=dict(
                    text="Specialization<br>Strength",
                    font=dict(color="white")
                ),
                tickfont=dict(color="white"),
                bgcolor="rgba(1, 9, 26, 0.8)",
                bordercolor="rgba(245, 246, 247, 0.2)",
                borderwidth=1
            ),
            line=dict(width=2, color='#01091A'),
            opacity=0.8
        ),
        text=[f"Performance: {p:.3f}<br>Specialization: {s:+.3f}" 
              for p, s in zip(df_plot['Performance'], df_plot['Specialization'])],
        hovertemplate="<b>%{y}</b><br>" +
                     "Domain: %{x}<br>" +
                     "%{text}<br>" +
                     "<extra></extra>"
    ))
    
    metric_display = "Action Completion" if metric_type == "AC" else "Tool Selection Quality"
    
    fig.update_layout(
        title=dict(
            text=f"<b>Domain Specialization Matrix: {metric_display}</b>",
            x=0.5,
            y=0.97,
            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
        ),
        xaxis=dict(
            title=dict(
                text="<b>Business Domains</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=13, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)"
        ),
        yaxis=dict(
            title=dict(
                text="<b>Models</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=11, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)"
        ),
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=1100,
        width=1450,
        margin=dict(t=100, b=80, l=220, r=120)
    )
    
    return fig


def create_performance_gap_analysis(df, metric_type="AC"):
    """Create range plot showing performance gaps by domain"""
    domains = ['Banking', 'Healthcare', 'Insurance', 'Investment', 'Telecom']
    
    # Calculate min, max, median for each domain
    gap_data = []
    for domain in domains:
        domain_col = f'{domain} {metric_type}'
        if domain_col in df.columns:
            domain_values = pd.to_numeric(df[domain_col], errors='coerce').dropna()
            if len(domain_values) > 0:
                gap_data.append({
                    'Domain': domain,
                    'Min': domain_values.min(),
                    'Max': domain_values.max(),
                    'Median': domain_values.median(),
                    'Q1': domain_values.quantile(0.25),
                    'Q3': domain_values.quantile(0.75),
                    'Gap': domain_values.max() - domain_values.min()
                })
    
    if not gap_data:
        return create_empty_chart("No data available for gap analysis")
    
    df_gap = pd.DataFrame(gap_data)
    df_gap = df_gap.sort_values('Gap', ascending=True)
    
    fig = go.Figure()
    
    # Add range bars
    for idx, row in df_gap.iterrows():
        # Add full range line
        fig.add_trace(go.Scatter(
            x=[row['Min'], row['Max']],
            y=[row['Domain'], row['Domain']],
            mode='lines',
            line=dict(color='#64748B', width=2),
            showlegend=False,
            hoverinfo='skip'
        ))
        
        # Add IQR box
        fig.add_trace(go.Scatter(
            x=[row['Q1'], row['Q3'], row['Q3'], row['Q1'], row['Q1']],
            y=[row['Domain'], row['Domain'], row['Domain'], row['Domain'], row['Domain']],
            fill='toself',
            fillcolor='rgba(255, 210, 30, 0.3)',
            line=dict(color='#ffd21e', width=2),
            showlegend=False,
            hoverinfo='skip',
            mode='lines'
        ))
        
        # Add median marker
        fig.add_trace(go.Scatter(
            x=[row['Median']],
            y=[row['Domain']],
            mode='markers',
            marker=dict(
                size=12,
                color='#ffd21e',
                symbol='diamond',
                line=dict(width=2, color='#01091A')
            ),
            showlegend=False,
            hovertemplate=f"<b>{row['Domain']}</b><br>" +
                         f"Min: {row['Min']:.3f}<br>" +
                         f"Q1: {row['Q1']:.3f}<br>" +
                         f"Median: {row['Median']:.3f}<br>" +
                         f"Q3: {row['Q3']:.3f}<br>" +
                         f"Max: {row['Max']:.3f}<br>" +
                         f"Gap: {row['Gap']:.3f}<br>" +
                         "<extra></extra>"
        ))
    
    # Add min/max points
    for idx, row in df_gap.iterrows():
        fig.add_trace(go.Scatter(
            x=[row['Min'], row['Max']],
            y=[row['Domain'], row['Domain']],
            mode='markers',
            marker=dict(size=8, color='white', line=dict(width=2, color='#01091A')),
            showlegend=False,
            hoverinfo='skip'
        ))
    
    metric_display = "Action Completion" if metric_type == "AC" else "Tool Selection Quality"
    
    fig.update_layout(
        title=dict(
            text=f"<b>Performance Gap Analysis by Domain: {metric_display}</b>",
            x=0.5,
            y=0.97,
            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
        ),
        xaxis=dict(
            title=dict(
                text=f"<b>{metric_display} Score</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=12, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)",
            range=[0, 1] if metric_type in ['AC', 'TSQ'] else None
        ),
        yaxis=dict(
            title=dict(
                text="<b>Business Domain</b>",
                font=dict(size=16, color="white")
            ),
            tickfont=dict(size=13, color="white"),
            gridcolor="rgba(245, 246, 247, 0.1)"
        ),
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)",
        height=800,
        width=1450,
        margin=dict(t=100, b=80, l=140, r=80),
        showlegend=False
    )
    
    # Add legend manually
    fig.add_annotation(
        text="◆ Median   ━ IQR   ─ Full Range",
        xref="paper", yref="paper",
        x=0.98, y=0.02,
        xanchor='right', yanchor='bottom',
        font=dict(size=12, color='white'),
        showarrow=False
    )
    
    return fig


def create_empty_chart(message):
    """Create an empty chart with a message"""
    fig = go.Figure()
    
    fig.add_annotation(
        text=f"📊 {message}",
        xref="paper", yref="paper",
        x=0.5, y=0.5,
        xanchor='center', yanchor='middle',
        font=dict(
            size=18, 
            color="white",
            family="'Geist', sans-serif"
        ),
        showarrow=False,
        bgcolor="rgba(245, 246, 247, 0.05)",
        bordercolor="rgba(245, 246, 247, 0.2)",
        borderwidth=1,
        borderpad=20
    )
    
    fig.update_layout(
        paper_bgcolor="#01091A",
        plot_bgcolor="rgba(245, 246, 247, 0.02)", 
        height=700,
        width=1450,
        margin=dict(t=80, b=80, l=80, r=80)
    )