Spaces:

akera
/

leaderboard

Sleeping

App Files Files Community

akera commited on Jun 13, 2025

Commit

ce626d3

verified ·

1 Parent(s): 796d1cd

Update src/plotting.py

Browse files

Files changed (1) hide show

src/plotting.py +124 -57

src/plotting.py CHANGED Viewed

@@ -27,14 +27,12 @@ def create_leaderboard_ranking_plot(df: pd.DataFrame, metric: str = 'quality_sco
             x=0.5, y=0.5, showarrow=False,
             font=dict(size=16)
         )
         return fig
     # Get top N models
     top_models = df.head(top_n)
-    # Create color scale based on scores
-    colors = px.colors.qualitative.Set3[:len(top_models)]
     # Create horizontal bar chart
     fig = go.Figure(data=[
         go.Bar(
@@ -79,7 +77,10 @@ def create_metrics_comparison_plot(df: pd.DataFrame, models: List[str] = None, m
     """Create radar chart comparing multiple metrics across models."""
     if df.empty:
-        return go.Figure().add_annotation(text="No data available", x=0.5, y=0.5)
     # Select models to compare
     if models is None:
@@ -88,7 +89,10 @@ def create_metrics_comparison_plot(df: pd.DataFrame, models: List[str] = None, m
         selected_models = df[df['model_name'].isin(models)].head(max_models)
     if len(selected_models) == 0:
-        return go.Figure().add_annotation(text="No models found", x=0.5, y=0.5)
     # Metrics to include in radar chart
     metrics = ['quality_score', 'bleu', 'chrf', 'rouge1', 'rougeL']
@@ -139,7 +143,10 @@ def create_language_pair_heatmap(results_dict: Dict, metric: str = 'quality_scor
     """Create heatmap showing performance across language pairs."""
     if not results_dict or 'pair_metrics' not in results_dict:
-        return go.Figure().add_annotation(text="No language pair data available", x=0.5, y=0.5)
     pair_metrics = results_dict['pair_metrics']
@@ -168,7 +175,7 @@ def create_language_pair_heatmap(results_dict: Dict, metric: str = 'quality_scor
         colorscale='Viridis',
         showscale=True,
         colorbar=dict(title=metric.replace('_', ' ').title()),
-        hoverinfotemplate=(
             "Source: %{y}<br>" +
             "Target: %{x}<br>" +
             f"{metric.replace('_', ' ').title()}: %{{z:.3f}}<br>" +
@@ -190,7 +197,10 @@ def create_coverage_analysis_plot(df: pd.DataFrame) -> go.Figure:
     """Create plot analyzing test set coverage across submissions."""
     if df.empty:
-        return go.Figure().add_annotation(text="No data available", x=0.5, y=0.5)
     fig = make_subplots(
         rows=2, cols=2,
@@ -258,7 +268,10 @@ def create_model_performance_timeline(df: pd.DataFrame) -> go.Figure:
     """Create timeline showing model performance over time."""
     if df.empty:
-        return go.Figure().add_annotation(text="No data available", x=0.5, y=0.5)
     # Convert submission_date to datetime
     df_copy = df.copy()
@@ -319,10 +332,13 @@ def create_google_comparison_plot(df: pd.DataFrame) -> go.Figure:
     google_models = df[df['google_pairs_covered'] > 0].copy()
     if google_models.empty:
-        return go.Figure().add_annotation(
             text="No models with Google Translate comparable results",
-            x=0.5, y=0.5
         )
     fig = go.Figure()
@@ -360,10 +376,13 @@ def create_google_comparison_plot(df: pd.DataFrame) -> go.Figure:
     return fig
 def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.Figure:
-    """Create detailed analysis plot for a specific model."""
     if not model_results or 'pair_metrics' not in model_results:
-        return go.Figure().add_annotation(text="No detailed results available", x=0.5, y=0.5)
     pair_metrics = model_results['pair_metrics']
@@ -388,22 +407,26 @@ def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.F
             google_comparable.append(is_google)
     if not pairs:
-        return go.Figure().add_annotation(text="No language pair data found", x=0.5, y=0.5)
-    # Create subplot
     fig = make_subplots(
         rows=2, cols=1,
         subplot_titles=(
-            f"{model_name} - BLEU Scores by Language Pair",
-            f"{model_name} - Quality Scores by Language Pair"
         ),
-        vertical_spacing=0.1
     )
     # Color code by Google comparable
     colors = ['#1f77b4' if gc else '#ff7f0e' for gc in google_comparable]
-    # BLEU scores
     fig.add_trace(
         go.Bar(
             x=pairs,
@@ -411,12 +434,14 @@ def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.F
             marker_color=colors,
             name="BLEU",
             text=[f"{score:.1f}" for score in bleu_scores],
-            textposition='auto'
         ),
         row=1, col=1
     )
-    # Quality scores
     fig.add_trace(
         go.Bar(
             x=pairs,
@@ -424,27 +449,47 @@ def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.F
             marker_color=colors,
             name="Quality",
             text=[f"{score:.3f}" for score in quality_scores],
-            textposition='auto',
             showlegend=False
         ),
         row=2, col=1
     )
     fig.update_layout(
-        height=800,
-        title=f"📊 Detailed Analysis: {model_name}",
-        showlegend=True
     )
-    # Rotate x-axis labels
-    fig.update_xaxes(tickangle=45)
-    # Add legend for colors
     fig.add_trace(
         go.Scatter(
             x=[None], y=[None],
             mode='markers',
-            marker=dict(size=10, color='#1f77b4'),
             name="Google Comparable",
             showlegend=True
         )
@@ -454,7 +499,7 @@ def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.F
         go.Scatter(
             x=[None], y=[None],
             mode='markers',
-            marker=dict(size=10, color='#ff7f0e'),
             name="UG40 Only",
             showlegend=True
         )
@@ -468,25 +513,25 @@ def create_submission_summary_plot(validation_info: Dict, evaluation_results: Di
     fig = make_subplots(
         rows=2, cols=2,
         subplot_titles=(
-            "Coverage by Language Pair",
             "Primary Metrics",
-            "Error Analysis",
-            "Sample Distribution"
         ),
-        specs=[[{"type": "bar"}, {"type": "bar"}],
-               [{"type": "bar"}, {"type": "pie"}]]
     )
-    # Coverage by language pair
-    if 'pair_coverage' in validation_info:
-        pair_data = validation_info['pair_coverage']
-        pairs = list(pair_data.keys())[:10]  # Top 10 pairs
-        coverage_rates = [pair_data[p]['coverage_rate'] for p in pairs]
-        fig.add_trace(
-            go.Bar(x=pairs, y=coverage_rates, name="Coverage"),
-            row=1, col=1
-        )
     # Primary metrics
     if 'summary' in evaluation_results:
@@ -495,7 +540,13 @@ def create_submission_summary_plot(validation_info: Dict, evaluation_results: Di
         metric_values = list(metrics_data.values())
         fig.add_trace(
-            go.Bar(x=metric_names, y=metric_values, name="Metrics"),
             row=1, col=2
         )
@@ -505,20 +556,36 @@ def create_submission_summary_plot(validation_info: Dict, evaluation_results: Di
         error_values = [evaluation_results['averages'].get(m, 0) for m in error_metrics]
         fig.add_trace(
-            go.Bar(x=error_metrics, y=error_values, name="Errors"),
             row=2, col=1
         )
-    # Sample distribution (placeholder)
-    fig.add_trace(
-        go.Pie(
-            labels=["Evaluated", "Missing"],
-            values=[validation_info.get('coverage', 0.8) * 100,
-                   (1 - validation_info.get('coverage', 0.8)) * 100],
-            name="Samples"
-        ),
-        row=2, col=2
-    )
     fig.update_layout(
         title="📋 Submission Summary",

             x=0.5, y=0.5, showarrow=False,
             font=dict(size=16)
         )
+        fig.update_layout(title="No Data Available")
         return fig
     # Get top N models
     top_models = df.head(top_n)
     # Create horizontal bar chart
     fig = go.Figure(data=[
         go.Bar(
     """Create radar chart comparing multiple metrics across models."""
     if df.empty:
+        fig = go.Figure()
+        fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title="No Data Available")
+        return fig
     # Select models to compare
     if models is None:
         selected_models = df[df['model_name'].isin(models)].head(max_models)
     if len(selected_models) == 0:
+        fig = go.Figure()
+        fig.add_annotation(text="No models found", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title="No Models Found")
+        return fig
     # Metrics to include in radar chart
     metrics = ['quality_score', 'bleu', 'chrf', 'rouge1', 'rougeL']
     """Create heatmap showing performance across language pairs."""
     if not results_dict or 'pair_metrics' not in results_dict:
+        fig = go.Figure()
+        fig.add_annotation(text="No language pair data available", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title="No Language Pair Data Available")
+        return fig
     pair_metrics = results_dict['pair_metrics']
         colorscale='Viridis',
         showscale=True,
         colorbar=dict(title=metric.replace('_', ' ').title()),
+        hovertemplate=(
             "Source: %{y}<br>" +
             "Target: %{x}<br>" +
             f"{metric.replace('_', ' ').title()}: %{{z:.3f}}<br>" +
     """Create plot analyzing test set coverage across submissions."""
     if df.empty:
+        fig = go.Figure()
+        fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title="No Data Available")
+        return fig
     fig = make_subplots(
         rows=2, cols=2,
     """Create timeline showing model performance over time."""
     if df.empty:
+        fig = go.Figure()
+        fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title="No Data Available")
+        return fig
     # Convert submission_date to datetime
     df_copy = df.copy()
     google_models = df[df['google_pairs_covered'] > 0].copy()
     if google_models.empty:
+        fig = go.Figure()
+        fig.add_annotation(
             text="No models with Google Translate comparable results",
+            x=0.5, y=0.5, showarrow=False
         )
+        fig.update_layout(title="No Google Comparable Models")
+        return fig
     fig = go.Figure()
     return fig
 def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.Figure:
+    """Create detailed analysis plot for a specific model - FIXED version."""
     if not model_results or 'pair_metrics' not in model_results:
+        fig = go.Figure()
+        fig.add_annotation(text="No detailed results available", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title=f"No Data for {model_name}")
+        return fig
     pair_metrics = model_results['pair_metrics']
             google_comparable.append(is_google)
     if not pairs:
+        fig = go.Figure()
+        fig.add_annotation(text="No language pair data found", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title=f"No Language Pair Data for {model_name}")
+        return fig
+    # Create subplot with proper spacing and titles
     fig = make_subplots(
         rows=2, cols=1,
         subplot_titles=(
+            f"BLEU Scores by Language Pair",
+            f"Quality Scores by Language Pair"
         ),
+        vertical_spacing=0.15,
+        row_heights=[0.45, 0.45]
     )
     # Color code by Google comparable
     colors = ['#1f77b4' if gc else '#ff7f0e' for gc in google_comparable]
+    # BLEU scores (top subplot)
     fig.add_trace(
         go.Bar(
             x=pairs,
             marker_color=colors,
             name="BLEU",
             text=[f"{score:.1f}" for score in bleu_scores],
+            textposition='outside',
+            textfont=dict(size=10),
+            showlegend=True
         ),
         row=1, col=1
     )
+    # Quality scores (bottom subplot)
     fig.add_trace(
         go.Bar(
             x=pairs,
             marker_color=colors,
             name="Quality",
             text=[f"{score:.3f}" for score in quality_scores],
+            textposition='outside',
+            textfont=dict(size=10),
             showlegend=False
         ),
         row=2, col=1
     )
+    # Update layout
     fig.update_layout(
+        height=900,
+        title=dict(
+            text=f"📊 Detailed Analysis: {model_name}",
+            x=0.5,
+            xanchor='center'
+        ),
+        showlegend=True,
+        margin=dict(l=50, r=50, t=100, b=150)
     )
+    # Update x-axes to rotate labels properly
+    fig.update_xaxes(
+        tickangle=45,
+        tickfont=dict(size=10),
+        row=1, col=1
+    )
+    fig.update_xaxes(
+        tickangle=45,
+        tickfont=dict(size=10),
+        row=2, col=1
+    )
+    # Update y-axes
+    fig.update_yaxes(title_text="BLEU Score", row=1, col=1)
+    fig.update_yaxes(title_text="Quality Score", row=2, col=1)
+    # Add legend manually for Google vs UG40 only
     fig.add_trace(
         go.Scatter(
             x=[None], y=[None],
             mode='markers',
+            marker=dict(size=15, color='#1f77b4', symbol='square'),
             name="Google Comparable",
             showlegend=True
         )
         go.Scatter(
             x=[None], y=[None],
             mode='markers',
+            marker=dict(size=15, color='#ff7f0e', symbol='square'),
             name="UG40 Only",
             showlegend=True
         )
     fig = make_subplots(
         rows=2, cols=2,
         subplot_titles=(
+            "Sample Distribution",
             "Primary Metrics",
+            "Error Analysis",
+            "Coverage Summary"
         ),
+        specs=[[{"type": "pie"}, {"type": "bar"}],
+               [{"type": "bar"}, {"type": "bar"}]]
     )
+    # Sample distribution (pie chart)
+    coverage = validation_info.get('coverage', 0.8)
+    fig.add_trace(
+        go.Pie(
+            labels=["Evaluated", "Missing"],
+            values=[coverage * 100, (1 - coverage) * 100],
+            name="Samples"
+        ),
+        row=1, col=1
+    )
     # Primary metrics
     if 'summary' in evaluation_results:
         metric_values = list(metrics_data.values())
         fig.add_trace(
+            go.Bar(
+                x=metric_names,
+                y=metric_values,
+                name="Metrics",
+                text=[f"{val:.3f}" for val in metric_values],
+                textposition='auto'
+            ),
             row=1, col=2
         )
         error_values = [evaluation_results['averages'].get(m, 0) for m in error_metrics]
         fig.add_trace(
+            go.Bar(
+                x=error_metrics,
+                y=error_values,
+                name="Errors",
+                text=[f"{val:.3f}" for val in error_values],
+                textposition='auto'
+            ),
             row=2, col=1
         )
+    # Coverage summary
+    if 'summary' in evaluation_results:
+        summary = evaluation_results['summary']
+        coverage_labels = ["Total Samples", "Lang Pairs", "Google Pairs"]
+        coverage_values = [
+            summary.get('total_samples', 0),
+            summary.get('language_pairs_covered', 0),
+            summary.get('google_comparable_pairs', 0)
+        ]
+        fig.add_trace(
+            go.Bar(
+                x=coverage_labels,
+                y=coverage_values,
+                name="Coverage",
+                text=[f"{val}" for val in coverage_values],
+                textposition='auto'
+            ),
+            row=2, col=2
+        )
     fig.update_layout(
         title="📋 Submission Summary",