Spaces:

firobeid
/

EventDrivenTradingClassifierLeaderBoard

Sleeping

App Files Files Community

firobeid commited on Nov 23, 2025

Commit

5019076

verified ·

1 Parent(s): a0f7a5c

Update app.py

Browse files

Files changed (1) hide show

app.py +402 -136

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import gradio as gr
 import plotly.graph_objects as go
 import plotly.express as px
-import pandas as pd
 from plotly.subplots import make_subplots
-import numpy as np
-import io
 # Default sample data (will be replaced when CSV is uploaded)
 default_data = pd.DataFrame({
@@ -59,7 +57,7 @@ default_data = pd.DataFrame({
 def load_csv_data(file):
     """Load and validate CSV data"""
     if file is None:
-        return default_data, "Using default sample data"
     try:
         df = pd.read_csv(file.name)
@@ -70,18 +68,21 @@ def load_csv_data(file):
         missing_cols = [col for col in required_cols if col not in df.columns]
         if missing_cols:
-            return default_data, f"❌ Missing columns: {missing_cols}. Using default data."
         # Clean data
         df = df.dropna()
-        return df, f"✅ Successfully loaded {len(df)} records with {df['model'].nunique()} models"
     except Exception as e:
         return default_data, f"❌ Error loading CSV: {str(e)}. Using default data."
 def create_model_leaderboard(df, partition_filter='all', topic_filter='OVERALL'):
-    """Create leaderboard comparing all models"""
     filtered_df = df.copy()
     if partition_filter != 'all':
@@ -96,47 +97,65 @@ def create_model_leaderboard(df, partition_filter='all', topic_filter='OVERALL')
     # Calculate overall score (average of key metrics)
     leaderboard['Overall_Score'] = leaderboard[['Precision', 'Recall_Power', 'Accuracy']].mean(axis=1)
-    leaderboard = leaderboard.sort_values('Overall_Score', ascending=False)
-    # Create subplot for each metric
     fig = make_subplots(
-        rows=1, cols=len(metrics) + 1,
-        subplot_titles=metrics + ['Overall Score']
     )
-    colors = px.colors.qualitative.Set3[:len(leaderboard)]
     for i, metric in enumerate(metrics + ['Overall_Score']):
-        for j, (_, row) in enumerate(leaderboard.iterrows()):
-            fig.add_trace(
-                go.Bar(
-                    x=[row['model']],
-                    y=[row[metric]],
-                    name=row['model'] if i == 0 else "",
-                    marker_color=colors[j],
-                    showlegend=True if i == 0 else False,
-                    text=f"{row[metric]:.3f}",
-                    textposition="outside"
                 ),
-                row=1, col=i+1
-            )
     fig.update_layout(
-        title=f"Model Leaderboard - {partition_filter.title()} | {topic_filter}",
-        height=500,
-        showlegend=True
     )
-    # Update y-axes
     for i in range(1, len(metrics) + 2):
-        fig.update_yaxes(range=[0, 1], row=1, col=i)
     return fig
 def create_topic_comparison(df, models_selected=None, metric='Accuracy', partition_filter='all'):
-    """Compare selected models across topics"""
     if models_selected is None or len(models_selected) == 0:
-        models_selected = df['model'].unique()[:3]  # Default to first 3 models
     # Filter data
     filtered_df = df[df['model'].isin(models_selected)].copy()
@@ -150,34 +169,77 @@ def create_topic_comparison(df, models_selected=None, metric='Accuracy', partiti
     # Create grouped bar chart
     fig = go.Figure()
-    colors = px.colors.qualitative.Set3[:len(models_selected)]
     topics = sorted(topic_performance['topic'].unique())
     for i, model in enumerate(models_selected):
         model_data = topic_performance[topic_performance['model'] == model]
         fig.add_trace(go.Bar(
-            name=model,
             x=topics,
             y=model_data[metric],
-            marker_color=colors[i],
-            text=[f"{val:.3f}" for val in model_data[metric]],
-            textposition='outside'
         ))
     fig.update_layout(
-        title=f"Model Comparison Across Topics ({metric}) - {partition_filter.title()}",
-        xaxis_title="Topics",
-        yaxis_title=metric,
         barmode='group',
-        height=500,
-        xaxis_tickangle=-45,
-        yaxis=dict(range=[0, 1])
     )
     return fig
 def create_partition_analysis(df, models_selected=None):
-    """Analyze model performance across partitions"""
     if models_selected is None or len(models_selected) == 0:
         models_selected = df['model'].unique()[:3]
@@ -187,97 +249,187 @@ def create_partition_analysis(df, models_selected=None):
     metrics = ['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean']
     partition_performance = filtered_df.groupby(['model', 'partition'])[metrics].mean().reset_index()
-    # Create subplots for each metric
     fig = make_subplots(
-        rows=2, cols=4,
-        subplot_titles=metrics + [''],  # Extra empty title for 8th subplot
-        specs=[[{"colspan": 1}, {"colspan": 1}, {"colspan": 1}, {"colspan": 1}],
-               [{"colspan": 1}, {"colspan": 1}, {"colspan": 1}, None]]  # 7 subplots total
     )
-    colors = px.colors.qualitative.Set3[:len(models_selected)]
     partitions = ['train', 'test', 'inference']
     # Plot each metric
-    for i, metric in enumerate(metrics):
-        row = 1 if i < 4 else 2
-        col = (i % 4) + 1
         for j, model in enumerate(models_selected):
             model_data = partition_performance[partition_performance['model'] == model]
-            model_data = model_data.sort_values('partition')  # Ensure consistent ordering
             fig.add_trace(
                 go.Bar(
-                    name=model if i == 0 else "",
                     x=model_data['partition'],
                     y=model_data[metric],
-                    marker_color=colors[j],
-                    showlegend=True if i == 0 else False,
-                    text=[f"{val:.3f}" for val in model_data[metric]],
-                    textposition='outside'
                 ),
                 row=row, col=col
             )
     fig.update_layout(
-        title="Model Performance Across Partitions - All Metrics",
-        height=800,
-        barmode='group'
     )
-    # Update y-axes for all subplots
-    for i in range(1, 8):  # 7 subplots
-        row = 1 if i <= 4 else 2
-        col = i if i <= 4 else i - 4
-        if i <= 7:  # Only update existing subplots
-            fig.update_yaxes(range=[0, 1], row=row, col=col)
     return fig
 def create_performance_summary_table(df):
-    """Create summary table with key statistics"""
-    # Calculate summary statistics
     summary_stats = []
     for model in df['model'].unique():
         model_data = df[df['model'] == model]
         stats = {
-            'Model': model,
-            'Avg_Accuracy': model_data['Accuracy'].mean(),
-            'Avg_Precision': model_data['Precision'].mean(),
-            'Avg_Recall': model_data['Recall_Power'].mean(),
-            'Avg_G_mean': model_data['G_mean'].mean(),
-            'Best_Topic_Accuracy': model_data.loc[model_data['Accuracy'].idxmax(), 'topic'],
-            'Best_Topic_Score': model_data['Accuracy'].max(),
-            'Worst_Topic_Accuracy': model_data.loc[model_data['Accuracy'].idxmin(), 'topic'],
-            'Worst_Topic_Score': model_data['Accuracy'].min(),
-            'Performance_Variance': model_data['Accuracy'].var()
         }
         summary_stats.append(stats)
     summary_df = pd.DataFrame(summary_stats)
-    summary_df = summary_df.round(4)
-    summary_df = summary_df.sort_values('Avg_Accuracy', ascending=False)
     return summary_df
-# Create the Gradio interface
-with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifier Dashboard", theme=gr.themes.Soft()) as demo:
-    gr.HTML("<h1 style='text-align: center; color: #2E86AB;'>🏆 Multi-Model Classifier Dashboard</h1>")
     # Data loading section
     with gr.Row():
-        with gr.Column():
             csv_file = gr.File(
                 label="📁 Upload CSV File",
-                file_types=['.csv']
             )
             data_status = gr.Textbox(
-                label="Data Status",
-                value="Using default sample data with 2 models",
-                interactive=False
             )
     # Store current data
@@ -285,66 +437,111 @@ with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifi
     with gr.Tabs():
         with gr.TabItem("🏆 Model Leaderboard"):
             with gr.Row():
                 with gr.Column(scale=1):
                     partition_filter = gr.Dropdown(
                         choices=['all', 'inference', 'test', 'train'],
                         value='all',
-                        label="Filter by Partition"
                     )
                     topic_filter = gr.Dropdown(
                         choices=['all', 'OVERALL'],
                         value='OVERALL',
-                        label="Filter by Topic"
                     )
                 with gr.Column(scale=3):
                     leaderboard_chart = gr.Plot()
         with gr.TabItem("📊 Topic Comparison"):
             with gr.Row():
                 with gr.Column(scale=1):
                     models_selector = gr.CheckboxGroup(
                         choices=[],
-                        label="Select Models to Compare",
                         value=[]
                     )
                     metric_selector = gr.Dropdown(
                         choices=['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean'],
                         value='Accuracy',
-                        label="Select Metric"
                     )
                     partition_filter_topic = gr.Dropdown(
                         choices=['all', 'inference', 'test', 'train'],
                         value='all',
-                        label="Filter by Partition"
                     )
                 with gr.Column(scale=3):
                     topic_comparison_chart = gr.Plot()
         with gr.TabItem("🔄 Partition Analysis"):
             with gr.Row():
                 with gr.Column(scale=1):
                     models_selector_partition = gr.CheckboxGroup(
                         choices=[],
-                        label="Select Models to Analyze",
                         value=[]
                     )
                 with gr.Column(scale=3):
                     partition_analysis_chart = gr.Plot()
         with gr.TabItem("📈 Performance Summary"):
             summary_table = gr.DataFrame(
-                label="Model Performance Summary",
-                interactive=False
             )
         with gr.TabItem("📋 Raw Data"):
             raw_data_table = gr.DataFrame(
-                label="Complete Dataset",
-                interactive=True
             )
     def update_dashboard(file):
@@ -355,18 +552,23 @@ with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifi
         model_choices = sorted(df['model'].unique())
         topic_choices = ['all'] + sorted(df['topic'].unique())
         # Create initial plots
         leaderboard = create_model_leaderboard(df)
-        topic_comp = create_topic_comparison(df, model_choices[:3])
-        partition_analysis = create_partition_analysis(df, model_choices[:3])
         summary = create_performance_summary_table(df)
         return (
             df, status,
             gr.update(choices=topic_choices, value='OVERALL'),
-            gr.update(choices=model_choices, value=model_choices[:3]),
-            gr.update(choices=model_choices, value=model_choices[:3]),
-            leaderboard, topic_comp, partition_analysis, summary, df
         )
     # Event handlers
@@ -375,9 +577,9 @@ with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifi
         inputs=[csv_file],
         outputs=[
             current_data, data_status, topic_filter,
-            models_selector, models_selector_partition,
             leaderboard_chart, topic_comparison_chart,
-            partition_analysis_chart, summary_table, raw_data_table
         ]
     )
@@ -399,6 +601,8 @@ with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifi
     # Update topic comparison when models, metric, or partition change
     def update_topic_comparison(data, selected_models, metric, partition):
         return create_topic_comparison(data, selected_models, metric, partition)
     models_selector.change(
@@ -421,6 +625,8 @@ with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifi
     # Update partition analysis when models change
     def update_partition_analysis(data, selected_models):
         return create_partition_analysis(data, selected_models)
     models_selector_partition.change(
@@ -429,45 +635,105 @@ with gr.Blocks(title="Course Backbone Project Leaderboard \nMulti-Model Classifi
         outputs=partition_analysis_chart
     )
     # Initialize dashboard with default data
     demo.load(
         fn=lambda: update_dashboard(None),
         outputs=[
             current_data, data_status, topic_filter,
-            models_selector, models_selector_partition,
             leaderboard_chart, topic_comparison_chart,
-            partition_analysis_chart, summary_table, raw_data_table
         ]
     )
     gr.Markdown("""
-    ### 💡 Dashboard Features
-    **📁 Data Loading**: Upload your CSV file with classifier results. The app automatically detects all models and creates comparisons.
-    **🏆 Model Leaderboard**:
-    - Compare all models side-by-side across key metrics
-    - Filter by partition and topic for specific comparisons
-    - Overall score calculated from precision, recall, and accuracy
-    **📊 Topic Comparison**:
-    - Select specific models to compare across all topics
-    - Choose any metric (FPR, Confidence, FDR, Precision, Recall_Power, Accuracy, G_mean)
-    - Filter by partition to focus on specific evaluation splits
-    - Visual comparison across business categories
-    **🔄 Partition Analysis**:
-    - Analyze all metrics across train/test/inference partitions
-    - Compare multiple models across different evaluation splits
-    - Monitor generalization capabilities and detect overfitting
-    - Comprehensive view of all 7 performance metrics
-    **📈 Performance Summary**:
-    - Statistical overview of each model's performance
-    - Best and worst performing topics for each model
-    - Performance variance analysis
-    **CSV Format**: Your file should have columns: `model`, `partition`, `topic`, `FPR`, `Confidence`, `FDR`, `Precision`, `Recall_Power`, `Accuracy`, `G_mean`
     """)
 if __name__ == "__main__":

 import gradio as gr
+import pandas as pd
 import plotly.graph_objects as go
 import plotly.express as px
 from plotly.subplots import make_subplots
 # Default sample data (will be replaced when CSV is uploaded)
 default_data = pd.DataFrame({
 def load_csv_data(file):
     """Load and validate CSV data"""
     if file is None:
+        return default_data, "📊 Using default sample data (2 models, 48 records)"
     try:
         df = pd.read_csv(file.name)
         missing_cols = [col for col in required_cols if col not in df.columns]
         if missing_cols:
+            return default_data, f"❌ Missing columns: {', '.join(missing_cols)}. Using default data."
         # Clean data
         df = df.dropna()
+        num_models = df['model'].nunique()
+        num_records = len(df)
+        return df, f"✅ Successfully loaded: {num_records} records | {num_models} models | {df['topic'].nunique()} topics"
     except Exception as e:
         return default_data, f"❌ Error loading CSV: {str(e)}. Using default data."
 def create_model_leaderboard(df, partition_filter='all', topic_filter='OVERALL'):
+    """Create enhanced leaderboard comparing all models"""
     filtered_df = df.copy()
     if partition_filter != 'all':
     # Calculate overall score (average of key metrics)
     leaderboard['Overall_Score'] = leaderboard[['Precision', 'Recall_Power', 'Accuracy']].mean(axis=1)
+    # Create subplot for each metric with horizontal bars
     fig = make_subplots(
+        rows=len(metrics) + 1, cols=1,
+        subplot_titles=['<b>' + m.replace('_', ' ') + '</b>' for m in metrics] + ['<b>Overall Score</b>'],
+        vertical_spacing=0.08
     )
+    # Generate enough colors for all models
+    num_models = len(leaderboard)
+    color_palette = px.colors.qualitative.Plotly
+    colors = (color_palette * ((num_models // len(color_palette)) + 1))[:num_models]
     for i, metric in enumerate(metrics + ['Overall_Score']):
+        sorted_data = leaderboard.sort_values(metric, ascending=True)
+        fig.add_trace(
+            go.Bar(
+                y=sorted_data['model'],
+                x=sorted_data[metric],
+                orientation='h',
+                marker=dict(
+                    color=sorted_data[metric],
+                    colorscale='RdYlGn',
+                    showscale=False,
+                    line=dict(color='rgb(50,50,50)', width=1.5)
                 ),
+                text=[f"<b>{val:.4f}</b>" for val in sorted_data[metric]],
+                textposition='auto',
+                textfont=dict(size=12, color='white', family='Arial Black'),
+                hovertemplate='<b>%{y}</b><br>' + metric.replace('_', ' ') + ': <b>%{x:.4f}</b><extra></extra>'
+            ),
+            row=i+1, col=1
+        )
     fig.update_layout(
+        title=dict(
+            text=f"<b>🏆 Model Leaderboard</b><br><sub>Partition: {partition_filter.title()} | Topic: {topic_filter}</sub>",
+            font=dict(size=22, color='#2c3e50')
+        ),
+        height=300 * (len(metrics) + 1),
+        showlegend=False,
+        font=dict(size=12, family='Arial'),
+        plot_bgcolor='rgba(245,245,245,0.8)',
+        paper_bgcolor='white'
     )
+    # Update axes
     for i in range(1, len(metrics) + 2):
+        fig.update_xaxes(range=[0, 1.05], gridcolor='rgba(200,200,200,0.5)',
+                        showgrid=True,  fixedrange=False, row=i, col=1)
+        fig.update_yaxes(tickfont=dict(size=11), row=i, col=1)
     return fig
 def create_topic_comparison(df, models_selected=None, metric='Accuracy', partition_filter='all'):
+    """Create enhanced topic comparison chart"""
     if models_selected is None or len(models_selected) == 0:
+        models_selected = df['model'].unique()[:3]
     # Filter data
     filtered_df = df[df['model'].isin(models_selected)].copy()
     # Create grouped bar chart
     fig = go.Figure()
+    # Generate enough colors for all models
+    num_models = len(models_selected)
+    color_palette = px.colors.qualitative.Bold
+    colors = (color_palette * ((num_models // len(color_palette)) + 1))[:num_models]
     topics = sorted(topic_performance['topic'].unique())
     for i, model in enumerate(models_selected):
         model_data = topic_performance[topic_performance['model'] == model]
+        # Sort by topic order
+        model_data = model_data.set_index('topic').reindex(topics).reset_index()
+        # Shortened model name for legend
+        model_short = model if len(model) <= 30 else model[:27] + '...'
         fig.add_trace(go.Bar(
+            name=model_short,
             x=topics,
             y=model_data[metric],
+            marker=dict(
+                color=colors[i],
+                line=dict(color='rgb(40,40,40)', width=1.5),
+                opacity=0.85
+            ),
+            text=[f"<b>{val:.3f}</b>" if not pd.isna(val) else 'N/A' for val in model_data[metric]],
+            textposition='outside',
+            textfont=dict(size=11, color='black'),
+            hovertemplate='<b>Topic:</b> %{x}<br><b>Model:</b> ' + model + '<br><b>' + metric + ':</b> %{y:.4f}<extra></extra>'
         ))
     fig.update_layout(
+        title=dict(
+            text=f"<b>📊 Topic Performance Comparison</b><br><sub>Metric: {metric.replace('_', ' ')} | Partition: {partition_filter.title()}</sub>",
+            font=dict(size=20, color='#2c3e50')
+        ),
+        xaxis_title="<b>Topics</b>",
+        yaxis_title=f"<b>{metric.replace('_', ' ')}</b>",
         barmode='group',
+        height=600,
+        xaxis=dict(
+            tickangle=-45,
+            tickfont=dict(size=11),
+            fixedrange=False
+        ),
+        yaxis=dict(
+            range=[0, max(1.1, topic_performance[metric].max() * 1.15)],
+            gridcolor='rgba(200,200,200,0.4)',
+            fixedrange=False,
+            showgrid=True
+        ),
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            font=dict(size=12),
+            bgcolor='rgba(255,255,255,0.8)',
+            bordercolor='gray',
+            borderwidth=1
+        ),
+        plot_bgcolor='rgba(245,245,245,0.6)',
+        paper_bgcolor='white',
+        bargap=0.15,
+        bargroupgap=0.1
     )
     return fig
 def create_partition_analysis(df, models_selected=None):
+    """Create enhanced partition analysis with all metrics"""
     if models_selected is None or len(models_selected) == 0:
         models_selected = df['model'].unique()[:3]
     metrics = ['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean']
     partition_performance = filtered_df.groupby(['model', 'partition'])[metrics].mean().reset_index()
+    # Create subplots in a 3x3 grid
     fig = make_subplots(
+        rows=3, cols=3,
+        subplot_titles=['<b>' + m.replace('_', ' ') + '</b>' for m in metrics] + ['', ''],
+        specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
+               [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
+               [{"type": "bar"}, None, None]],
+        vertical_spacing=0.12,
+        horizontal_spacing=0.1
     )
+    # Generate enough colors for all models
+    num_models = len(models_selected)
+    color_palette = px.colors.qualitative.Bold
+    colors = (color_palette * ((num_models // len(color_palette)) + 1))[:num_models]
     partitions = ['train', 'test', 'inference']
     # Plot each metric
+    positions = [(1,1), (1,2), (1,3), (2,1), (2,2), (2,3), (3,1)]
+    for idx, metric in enumerate(metrics):
+        row, col = positions[idx]
         for j, model in enumerate(models_selected):
             model_data = partition_performance[partition_performance['model'] == model]
+            model_data = model_data.sort_values('partition')
+            # Shortened model name
+            model_short = model if len(model) <= 25 else model[:22] + '...'
             fig.add_trace(
                 go.Bar(
+                    name=model_short if idx == 0 else "",
                     x=model_data['partition'],
                     y=model_data[metric],
+                    marker=dict(
+                        color=colors[j],
+                        line=dict(color='rgb(40,40,40)', width=1.5),
+                        opacity=0.85
+                    ),
+                    showlegend=True if idx == 0 else False,
+                    text=[f"<b>{val:.3f}</b>" for val in model_data[metric]],
+                    textposition='outside',
+                    textfont=dict(size=10, color='black'),
+                    hovertemplate='<b>Partition:</b> %{x}<br><b>Model:</b> ' + model + '<br><b>' + metric + ':</b> %{y:.4f}<extra></extra>'
                 ),
                 row=row, col=col
             )
     fig.update_layout(
+        title=dict(
+            text="<b>🔄 Model Performance Across Partitions</b><br><sub>All Metrics Overview</sub>",
+            font=dict(size=20, color='#2c3e50')
+        ),
+        height=950,
+        barmode='group',
+        bargap=0.15,
+        bargroupgap=0.1,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            font=dict(size=11),
+            bgcolor='rgba(255,255,255,0.8)',
+            bordercolor='gray',
+            borderwidth=1
+        ),
+        plot_bgcolor='rgba(245,245,245,0.6)',
+        paper_bgcolor='white'
     )
+    # Update axes for all subplots
+    for idx in range(len(metrics)):
+        row, col = positions[idx]
+        fig.update_yaxes(
+            range=[0, 1.05],
+            gridcolor='rgba(200,200,200,0.4)',
+            showgrid=True,
+            row=row, col=col, fixedrange=False
+        )
+        fig.update_xaxes(tickfont=dict(size=10), row=row, col=col)
     return fig
 def create_performance_summary_table(df):
+    """Create enhanced summary table with key statistics"""
     summary_stats = []
     for model in df['model'].unique():
         model_data = df[df['model'] == model]
         stats = {
+            '🏷️ Model': model,
+            '📊 Avg Accuracy': f"{model_data['Accuracy'].mean():.4f}",
+            '🎯 Avg Precision': f"{model_data['Precision'].mean():.4f}",
+            '🔍 Avg Recall': f"{model_data['Recall_Power'].mean():.4f}",
+            '📈 Avg G-mean': f"{model_data['G_mean'].mean():.4f}",
+            '✅ Best Topic': model_data.loc[model_data['Accuracy'].idxmax(), 'topic'],
+            '⭐ Best Score': f"{model_data['Accuracy'].max():.4f}",
+            '⚠️ Worst Topic': model_data.loc[model_data['Accuracy'].idxmin(), 'topic'],
+            '📉 Worst Score': f"{model_data['Accuracy'].min():.4f}",
+            '📊 Variance': f"{model_data['Accuracy'].var():.6f}"
         }
         summary_stats.append(stats)
     summary_df = pd.DataFrame(summary_stats)
+    # Sort by average accuracy
+    summary_df['_sort_key'] = summary_df['📊 Avg Accuracy'].astype(float)
+    summary_df = summary_df.sort_values('_sort_key', ascending=False)
+    summary_df = summary_df.drop('_sort_key', axis=1)
     return summary_df
+def create_detailed_metrics_heatmap(df, models_selected=None):
+    """Create a heatmap showing all metrics for selected models"""
+    if models_selected is None or len(models_selected) == 0:
+        models_selected = df['model'].unique()[:3]
+    filtered_df = df[df['model'].isin(models_selected)].copy()
+    # Calculate average for each metric by model
+    metrics = ['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean']
+    heatmap_data = filtered_df.groupby('model')[metrics].mean()
+    # Create heatmap
+    fig = go.Figure(data=go.Heatmap(
+        z=heatmap_data.values,
+        x=[m.replace('_', ' ') for m in metrics],
+        y=heatmap_data.index,
+        colorscale='RdYlGn',
+        text=heatmap_data.values.round(4),
+        texttemplate='<b>%{text}</b>',
+        textfont={"size": 12},
+        hovertemplate='<b>Model:</b> %{y}<br><b>Metric:</b> %{x}<br><b>Value:</b> %{z:.4f}<extra></extra>',
+        colorbar=dict(title="Score")
+    ))
+    fig.update_layout(
+        title=dict(
+            text="<b>🔥 Metrics Heatmap</b><br><sub>Average Performance Across All Topics and Partitions</sub>",
+            font=dict(size=20, color='#2c3e50')
+        ),
+        xaxis_title="<b>Metrics</b>",
+        yaxis_title="<b>Models</b>",
+        height=200 + (len(models_selected) * 60),
+        font=dict(size=12),
+        plot_bgcolor='white',
+        paper_bgcolor='white',
+        xaxis=dict(fixedrange=False),
+        yaxis=dict(fixedrange=False)
+    )
+    return fig
+# Create the Gradio interface with enhanced styling
+with gr.Blocks(title="Multi-Model Classifier Dashboard", theme=gr.themes.Soft()) as demo:
+    gr.HTML("""
+        <div style='text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;'>
+            <h1 style='color: white; margin: 0; font-size: 2.5em;'>🏆 Multi-Model Classifier Dashboard</h1>
+            <p style='color: #f0f0f0; margin: 10px 0 0 0; font-size: 1.1em;'>Comprehensive Performance Analysis & Comparison Tool</p>
+        </div>
+    """)
     # Data loading section
     with gr.Row():
+        with gr.Column(scale=2):
             csv_file = gr.File(
                 label="📁 Upload CSV File",
+                file_types=['.csv'],
+                file_count="single"
             )
+        with gr.Column(scale=3):
             data_status = gr.Textbox(
+                label="📊 Data Status",
+                value="Using default sample data (2 models, 48 records)",
+                interactive=False,
+                lines=2
             )
     # Store current data
     with gr.Tabs():
         with gr.TabItem("🏆 Model Leaderboard"):
+            gr.Markdown("### Compare all models side-by-side across key performance metrics")
             with gr.Row():
                 with gr.Column(scale=1):
                     partition_filter = gr.Dropdown(
                         choices=['all', 'inference', 'test', 'train'],
                         value='all',
+                        label="🔍 Filter by Partition"
                     )
                     topic_filter = gr.Dropdown(
                         choices=['all', 'OVERALL'],
                         value='OVERALL',
+                        label="🏷️ Filter by Topic"
                     )
+                    gr.Markdown("""
+                    **📖 How to use:**
+                    - Select partition to view performance on specific data splits
+                    - Choose topic to focus on particular business domains
+                    - Bars are color-coded: 🟢 Green = Better, 🔴 Red = Worse
+                    """)
                 with gr.Column(scale=3):
                     leaderboard_chart = gr.Plot()
         with gr.TabItem("📊 Topic Comparison"):
+            gr.Markdown("### Analyze how selected models perform across different topics")
             with gr.Row():
                 with gr.Column(scale=1):
                     models_selector = gr.CheckboxGroup(
                         choices=[],
+                        label="✅ Select Models to Compare",
                         value=[]
                     )
                     metric_selector = gr.Dropdown(
                         choices=['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean'],
                         value='Accuracy',
+                        label="📏 Select Metric"
                     )
                     partition_filter_topic = gr.Dropdown(
                         choices=['all', 'inference', 'test', 'train'],
                         value='all',
+                        label="🔍 Filter by Partition"
                     )
+                    gr.Markdown("""
+                    **📖 How to use:**
+                    - Check models you want to compare
+                    - Choose the metric to analyze
+                    - Compare strengths/weaknesses across topics
+                    """)
                 with gr.Column(scale=3):
                     topic_comparison_chart = gr.Plot()
         with gr.TabItem("🔄 Partition Analysis"):
+            gr.Markdown("### Examine model performance across train/test/inference splits")
             with gr.Row():
                 with gr.Column(scale=1):
                     models_selector_partition = gr.CheckboxGroup(
                         choices=[],
+                        label="✅ Select Models to Analyze",
                         value=[]
                     )
+                    gr.Markdown("""
+                    **📖 How to use:**
+                    - Select models to analyze
+                    - View all 7 metrics simultaneously
+                    - Identify overfitting (train >> test)
+                    - Check generalization (test vs inference)
+                    """)
                 with gr.Column(scale=3):
                     partition_analysis_chart = gr.Plot()
+        with gr.TabItem("🔥 Metrics Heatmap"):
+            gr.Markdown("### Visual overview of all metrics for quick comparison")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    models_selector_heatmap = gr.CheckboxGroup(
+                        choices=[],
+                        label="✅ Select Models for Heatmap",
+                        value=[]
+                    )
+                    gr.Markdown("""
+                    **📖 How to use:**
+                    - Select models to include in heatmap
+                    - Quickly spot strengths (green) and weaknesses (red)
+                    - Average across all topics and partitions
+                    """)
+                with gr.Column(scale=3):
+                    heatmap_chart = gr.Plot()
         with gr.TabItem("📈 Performance Summary"):
+            gr.Markdown("### Statistical overview and key insights for each model")
             summary_table = gr.DataFrame(
+                label="📊 Model Performance Summary Table",
+                interactive=False,
+                wrap=True
             )
         with gr.TabItem("📋 Raw Data"):
+            gr.Markdown("### Complete dataset view - explore all records")
             raw_data_table = gr.DataFrame(
+                label="🗂️ Complete Dataset",
+                interactive=True,
+                wrap=True
             )
     def update_dashboard(file):
         model_choices = sorted(df['model'].unique())
         topic_choices = ['all'] + sorted(df['topic'].unique())
+        # Select default models (up to 3)
+        default_models = model_choices[:min(3, len(model_choices))]
         # Create initial plots
         leaderboard = create_model_leaderboard(df)
+        topic_comp = create_topic_comparison(df, default_models)
+        partition_analysis = create_partition_analysis(df, default_models)
+        heatmap = create_detailed_metrics_heatmap(df, default_models)
         summary = create_performance_summary_table(df)
         return (
             df, status,
             gr.update(choices=topic_choices, value='OVERALL'),
+            gr.update(choices=model_choices, value=default_models),
+            gr.update(choices=model_choices, value=default_models),
+            gr.update(choices=model_choices, value=default_models),
+            leaderboard, topic_comp, partition_analysis, heatmap, summary, df
         )
     # Event handlers
         inputs=[csv_file],
         outputs=[
             current_data, data_status, topic_filter,
+            models_selector, models_selector_partition, models_selector_heatmap,
             leaderboard_chart, topic_comparison_chart,
+            partition_analysis_chart, heatmap_chart, summary_table, raw_data_table
         ]
     )
     # Update topic comparison when models, metric, or partition change
     def update_topic_comparison(data, selected_models, metric, partition):
+        if not selected_models:
+            selected_models = data['model'].unique()[:3]
         return create_topic_comparison(data, selected_models, metric, partition)
     models_selector.change(
     # Update partition analysis when models change
     def update_partition_analysis(data, selected_models):
+        if not selected_models:
+            selected_models = data['model'].unique()[:3]
         return create_partition_analysis(data, selected_models)
     models_selector_partition.change(
         outputs=partition_analysis_chart
     )
+    # Update heatmap when models change
+    def update_heatmap(data, selected_models):
+        if not selected_models:
+            selected_models = data['model'].unique()[:3]
+        return create_detailed_metrics_heatmap(data, selected_models)
+    models_selector_heatmap.change(
+        fn=update_heatmap,
+        inputs=[current_data, models_selector_heatmap],
+        outputs=heatmap_chart
+    )
     # Initialize dashboard with default data
     demo.load(
         fn=lambda: update_dashboard(None),
         outputs=[
             current_data, data_status, topic_filter,
+            models_selector, models_selector_partition, models_selector_heatmap,
             leaderboard_chart, topic_comparison_chart,
+            partition_analysis_chart, heatmap_chart, summary_table, raw_data_table
         ]
     )
     gr.Markdown("""
+    ---
+    ### 💡 Dashboard Features & Usage Guide
+    #### 📁 **Data Loading**
+    Upload your CSV file with classifier performance results. The dashboard automatically:
+    - Detects all models in your dataset
+    - Validates data structure and quality
+    - Creates comprehensive comparisons across all dimensions
+    #### 🏆 **Model Leaderboard**
+    - **Horizontal bar charts** make it easy to compare models at a glance
+    - **Color-coded performance**: Green indicates better scores, red indicates lower scores
+    - Filter by **partition** (train/test/inference) and **topic** for targeted analysis
+    - **Overall Score** calculated from precision, recall, and accuracy averages
+    #### 📊 **Topic Comparison**
+    - Select multiple models to compare side-by-side
+    - Choose any metric: FPR, Confidence, FDR, Precision, Recall, Accuracy, or G-mean
+    - Identify which topics each model excels at or struggles with
+    - Filter by partition to see performance on specific data splits
+    #### 🔄 **Partition Analysis**
+    - View all 7 metrics simultaneously in a compact grid layout
+    - Compare train/test/inference performance to detect overfitting
+    - Check model generalization capabilities
+    - Grouped bars show direct model-to-model comparisons
+    #### 🔥 **Metrics Heatmap**
+    - **Visual overview** of all metrics for quick pattern recognition
+    - Color intensity shows performance levels at a glance
+    - Average performance across all topics and partitions
+    - Perfect for executive summaries and presentations
+    #### 📈 **Performance Summary**
+    - Statistical overview with key performance indicators
+    - Best and worst performing topics identified for each model
+    - Performance variance shows consistency across topics
+    - Sortable table for custom analysis
+    #### 📋 **CSV Format Requirements**
+    Your CSV file must include these columns:
+    - `model`: Model name/identifier
+    - `partition`: Data split (train/test/inference)
+    - `topic`: Business domain or category
+    - `FPR`: False Positive Rate
+    - `Confidence`: Model confidence scores
+    - `FDR`: False Discovery Rate
+    - `Precision`: Positive predictive value
+    - `Recall_Power`: True positive rate / Sensitivity
+    - `Accuracy`: Overall correctness
+    - `G_mean`: Geometric mean of sensitivity and specificity
+    ---
+    ### 🎯 **Tips for Best Results**
+    1. **Compare 2-4 models** at a time for clearest visualizations
+    2. **Start with the Leaderboard** to identify top performers
+    3. **Use Topic Comparison** to find domain-specific strengths
+    4. **Check Partition Analysis** to ensure model generalization
+    5. **Review the Heatmap** for quick executive summaries
+    ### 🚀 **Advanced Analysis**
+    - **Overfitting Detection**: If train scores >> test scores in Partition Analysis
+    - **Generalization Check**: Compare test vs inference performance
+    - **Topic Specialization**: Use Topic Comparison to identify niche strengths
+    - **Consistency Analysis**: Check variance in Performance Summary
+    ---
+    <div style='text-align: center; padding: 15px; background-color: #f8f9fa; border-radius: 8px; margin-top: 20px;'>
+        <p style='margin: 0; color: #6c757d;'><b>Built with ❤️ using Gradio & Plotly</b></p>
+        <p style='margin: 5px 0 0 0; font-size: 0.9em; color: #6c757d;'>Interactive ML Model Performance Dashboard</p>
+    </div>
     """)
 if __name__ == "__main__":