Spaces:

SelvaKrish
/

RAG

Sleeping

App Files Files Community

Ajaykanth Maddi commited on Jul 19, 2025

Commit

1921c0a

1 Parent(s): 3e0e5e4

Code Changes - Reranking Implementation

Browse files

Files changed (1) hide show

app.py +161 -55

app.py CHANGED Viewed

@@ -114,7 +114,56 @@ def get_info_from_sample_questions(subset, question):
             return orig_ans, y_metrics
     return "No answer found.", "No metrics found."
-def plot_subset_metrics(subset_name):
     """Generate a bar plot of key metrics for a given subset with proper title display."""
     try:
         summary = ragbench_details[subset_name]["summary"]
@@ -169,61 +218,11 @@ def plot_subset_metrics(subset_name):
             )
         return fig
-    except KeyError:
-        print(f"Error: Subset '{subset_name}' not found")
     except Exception as e:
         print(f"Plotting error: {str(e)}")
-def plot_subset_metrics_old(subset_name):
-    summary = ragbench_details[subset_name]["summary"]
-    # Create a DataFrame for plotting
-    keys = ['Entries', 'TotalDocs', 'TotalUniqueIds', 'TotalUniqueDocs', 'UniqueDocsPercent']
-    values = [summary.get(k, 0) for k in keys]
-    fig, ax = plt.subplots(figsize=(8, 4))
-    bars = ax.bar(keys, values, color='skyblue')
-    plt.tight_layout()  # Apply tight layout after plotting
-    # ✅ Add count on top of bars
-    ax.bar_label(bars, fmt='%.0f', padding=3)
-    # ax.bar(keys, values, color="mediumseagreen")
-    ax.set_title(f"Metrics for Subset: {subset_name}, Domain: {summary.get('Domain')}", fontsize=14)
-    ax.set_ylabel("Value")
-    ax.grid(axis='y')
-    ax.set_xticks(range(len(keys)))
-    ax.set_xticklabels(keys, rotation=21, ha='right')
-    return fig
-def plot_chunking_strategies_old(subset_name):
-    chunking_data = ragbench_details[subset_name]["chunking"]
-    plt.figure(figsize=(8, 4))
-    strategies = list(chunking_data.keys())
-    counts = list(chunking_data.values())
-    bars = plt.bar(strategies, counts, color="skyblue")
-    # Add value labels on top of bars
-    for bar in bars:
-        yval = bar.get_height()
-        plt.text(bar.get_x() + bar.get_width()/2, yval + 20, int(yval), ha='center', va='bottom', fontsize=10)
-    # plt.xlabel("Chunking Strategies")
-    plt.tight_layout()  # Apply tight layout after plotting
-    plt.ylabel("Number of Chunks")
-    plt.title(f"Chunking Strategy Distribution - {subset_name}")
-    plt.xticks(rotation=30)
-    plt.tight_layout()
-    # Return plot as figure (Gradio accepts it)
-    return plt.gcf()
-def plot_chunking_strategies(subset_name):
     """Visualize chunking strategy distribution with enhanced formatting."""
     try:
         chunking_data = ragbench_details[subset_name]["chunking"]
@@ -235,9 +234,6 @@ def plot_chunking_strategies(subset_name):
         strategies = list(chunking_data.keys())
         counts = list(chunking_data.values())
-        # # Create color gradient based on count values
-        # colors = plt.cm.Blues(np.linspace(0.4, 1, len(strategies)))
         # Plot bars with different colors
         bars = ax.bar(strategies, counts, color='skyblue', edgecolor='white', linewidth=0.7)
@@ -277,11 +273,121 @@ def plot_chunking_strategies(subset_name):
         # Auto-scale y-axis with 10% headroom
         ax.set_ylim(0, max(counts) * 1.1)
         return fig
     except Exception as e:
         print(f"Error plotting chunking strategies: {str(e)}")
 # Initialize with first subset's summary
 initial_subset = available_subsets[0] if available_subsets else None
 initial_plot = plot_subset_metrics(initial_subset) if initial_subset else "No data available"

             return orig_ans, y_metrics
     return "No answer found.", "No metrics found."
+def plot_subset_metrics_old(subset_name):
+    summary = ragbench_details[subset_name]["summary"]
+    # Create a DataFrame for plotting
+    keys = ['Entries', 'TotalDocs', 'TotalUniqueIds', 'TotalUniqueDocs', 'UniqueDocsPercent']
+    values = [summary.get(k, 0) for k in keys]
+    fig, ax = plt.subplots(figsize=(8, 4))
+    bars = ax.bar(keys, values, color='skyblue')
+    plt.tight_layout()  # Apply tight layout after plotting
+    # ✅ Add count on top of bars
+    ax.bar_label(bars, fmt='%.0f', padding=3)
+    # ax.bar(keys, values, color="mediumseagreen")
+    ax.set_title(f"Metrics for Subset: {subset_name}, Domain: {summary.get('Domain')}", fontsize=14)
+    ax.set_ylabel("Value")
+    ax.grid(axis='y')
+    ax.set_xticks(range(len(keys)))
+    ax.set_xticklabels(keys, rotation=21, ha='right')
+    return fig
+def plot_chunking_strategies_old(subset_name):
+    chunking_data = ragbench_details[subset_name]["chunking"]
+    plt.figure(figsize=(8, 4))
+    strategies = list(chunking_data.keys())
+    counts = list(chunking_data.values())
+    bars = plt.bar(strategies, counts, color="skyblue")
+    # Add value labels on top of bars
+    for bar in bars:
+        yval = bar.get_height()
+        plt.text(bar.get_x() + bar.get_width()/2, yval + 20, int(yval), ha='center', va='bottom', fontsize=10)
+    # plt.xlabel("Chunking Strategies")
+    plt.tight_layout()  # Apply tight layout after plotting
+    plt.ylabel("Number of Chunks")
+    plt.title(f"Chunking Strategy Distribution - {subset_name}")
+    plt.xticks(rotation=30)
+    plt.tight_layout()
+    # Return plot as figure (Gradio accepts it)
+    return plt.gcf()
+def plot_subset_metrics_old1(subset_name):
     """Generate a bar plot of key metrics for a given subset with proper title display."""
     try:
         summary = ragbench_details[subset_name]["summary"]
             )
         return fig
     except Exception as e:
         print(f"Plotting error: {str(e)}")
+def plot_chunking_strategies_old1(subset_name):
     """Visualize chunking strategy distribution with enhanced formatting."""
     try:
         chunking_data = ragbench_details[subset_name]["chunking"]
         strategies = list(chunking_data.keys())
         counts = list(chunking_data.values())
         # Plot bars with different colors
         bars = ax.bar(strategies, counts, color='skyblue', edgecolor='white', linewidth=0.7)
         # Auto-scale y-axis with 10% headroom
         ax.set_ylim(0, max(counts) * 1.1)
+        return fig
+    except Exception as e:
+        print(f"Error plotting chunking strategies: {str(e)}")
+def plot_chunking_strategies(subset_name):
+    """Visualize chunking strategy distribution with consistent formatting."""
+    try:
+        chunking_data = ragbench_details[subset_name]["chunking"]
+        # Create figure with constrained layout
+        fig, ax = plt.subplots(figsize=(10, 5), constrained_layout=True)
+        # Prepare data
+        strategies = list(chunking_data.keys())
+        counts = list(chunking_data.values())
+        # Plot bars with consistent styling
+        bars = ax.bar(strategies, counts, color='skyblue', edgecolor='white', linewidth=0.7)
+        # Add value labels (consistent with plot_subset_metrics)
+        ax.bar_label(bars, fmt='%d', padding=3, fontsize=9)
+        # Customize plot (aligned with plot_subset_metrics style)
+        ax.set_title(
+            f"Chunking Strategy Distribution - {subset_name}",
+            fontsize=12,
+            pad=20,
+            loc='left'
+        )
+        ax.set_ylabel("Number of Chunks", fontsize=10)
+        # Rotate x-labels (consistent angle)
+        ax.set_xticks(range(len(strategies)))
+        ax.set_xticklabels(
+            strategies,
+            rotation=25,  # Matches 25° from plot_subset_metrics
+            ha='right',
+            fontsize=9,
+            rotation_mode='anchor'
+        )
+        # Consistent grid and spines
+        ax.grid(axis='y', linestyle=':', alpha=0.6)
+        ax.spines[['top', 'right']].set_visible(False)
+        # Auto-scale with same headroom
+        ax.set_ylim(0, max(counts) * 1.1)
         return fig
     except Exception as e:
         print(f"Error plotting chunking strategies: {str(e)}")
+def plot_subset_metrics(subset_name):
+    """Generate a bar plot of key metrics with consistent formatting."""
+    try:
+        summary = ragbench_details[subset_name]["summary"]
+        # Metrics to plot
+        metrics = {
+            'Entries': 'Total Entries',
+            'TotalDocs': 'Total Documents',
+            'TotalUniqueIds': 'Unique IDs',
+            'TotalUniqueDocs': 'Unique Documents',
+            'UniqueDocsPercent': '% Unique Docs'
+        }
+        # Prepare data
+        display_names = list(metrics.values())
+        values = [summary.get(metric, 0) for metric in metrics.keys()]
+        # Create figure with same layout
+        fig, ax = plt.subplots(figsize=(10, 5), constrained_layout=True)
+        # Plot bars with same style
+        bars = ax.bar(display_names, values, color='skyblue', edgecolor='white', linewidth=0.7)
+        # Consistent value labels
+        ax.bar_label(bars, fmt='%d', padding=3, fontsize=9)
+        # Title with same style
+        title = f"Dataset Metrics - {subset_name}"
+        if 'Domain' in summary:
+            title += f" (Domain: {summary['Domain']})"
+        ax.set_title(title, fontsize=12, pad=20, loc='left')
+        # Consistent axis styling
+        ax.set_ylabel("Count", fontsize=10)
+        ax.grid(axis='y', linestyle=':', alpha=0.6)
+        ax.spines[['top', 'right']].set_visible(False)
+        # Same label rotation
+        ax.set_xticks(range(len(display_names)))
+        ax.set_xticklabels(display_names, rotation=25, ha='right', fontsize=9)
+        # Special percentage handling (now matches chunking plot's y-limit logic)
+        if 'UniqueDocsPercent' in summary:
+            current_ylim = ax.get_ylim()
+            ax.set_ylim(current_ylim[0], max(current_ylim[1], summary['UniqueDocsPercent'] * 1.2))
+            ax.text(
+                len(metrics)-1,
+                summary['UniqueDocsPercent'],
+                f"{summary['UniqueDocsPercent']}%",
+                ha='center',
+                va='bottom',
+                fontsize=10,
+                bbox=dict(facecolor='white', alpha=0.8, edgecolor='none')
+            )
+        return fig
+    except Exception as e:
+        print(f"Error plotting metrics: {str(e)}")
 # Initialize with first subset's summary
 initial_subset = available_subsets[0] if available_subsets else None
 initial_plot = plot_subset_metrics(initial_subset) if initial_subset else "No data available"