Spaces:
Running
Running
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| import numpy as np | |
| # 30 distinct colors - assigned by Avg AUP rank | |
| COLOR_PALETTE = [ | |
| "#E91E63", "#4A90E2", "#00BFA5", "#FF6B35", "#8E24AA", | |
| "#4CAF50", "#FF4081", "#303F9F", "#FFD166", "#00796B", | |
| "#C2185B", "#7B1FA2", "#26A69A", "#1A4C7C", "#FF8C42", | |
| "#009688", "#673AB7", "#F44336", "#3F51B5", "#795548", | |
| "#607D8B", "#9C27B0", "#2196F3", "#CDDC39", "#FF9800", | |
| "#00BCD4", "#E64A19", "#5D4037", "#455A64", "#AD1457", | |
| ] | |
| def get_model_colors(df): | |
| """Assign colors to methods by Avg AUP rank (descending).""" | |
| models_sorted = df.sort_values("Avg_AUP", ascending=False)["Method"].tolist() | |
| return {model: COLOR_PALETTE[i % len(COLOR_PALETTE)] for i, model in enumerate(models_sorted)} | |
| def get_model_ranks(df): | |
| """Get rank for each method by Avg AUP.""" | |
| models_sorted = df.sort_values("Avg_AUP", ascending=False)["Method"].tolist() | |
| return {model: i + 1 for i, model in enumerate(models_sorted)} | |
| def hex_to_rgba(hex_color, alpha=0.25): | |
| hex_color = hex_color.lstrip('#') | |
| r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16) | |
| return f'rgba({r},{g},{b},{alpha})' | |
| def create_radar_chart(df, tasks, top_n=15): | |
| """Create radar chart for top N methods showing original AUP scores (independent axes).""" | |
| df_top = df.head(top_n).copy() | |
| model_colors = get_model_colors(df) | |
| model_ranks = get_model_ranks(df) | |
| all_cols = [f"{t}_AUP" for t in tasks] + ["Avg_AUP"] | |
| categories = [t.replace("-", "\n") for t in tasks] + ["Avg\nAUP"] | |
| # Compute min/max per column for normalization (for radar display only) | |
| col_stats = {} | |
| for col in all_cols: | |
| vals = df_top[col].dropna().astype(float) | |
| col_stats[col] = {'min': vals.min() if len(vals) > 0 else 0, | |
| 'max': vals.max() if len(vals) > 0 else 100} | |
| fig = go.Figure() | |
| for _, row in df_top.iterrows(): | |
| method = row["Method"] | |
| rank = model_ranks.get(method, 0) | |
| color = model_colors.get(method, "#808080") | |
| display_name = f"#{rank} {method}" | |
| # Original AUP values for hover display | |
| original_vals = [row.get(col, 0) or 0 for col in all_cols] | |
| # Normalized values for radar shape (0-100 scale per axis) | |
| normalized = [] | |
| for col, val in zip(all_cols, original_vals): | |
| stats = col_stats[col] | |
| range_val = stats['max'] - stats['min'] | |
| if range_val > 0: | |
| norm = ((val - stats['min']) / range_val) * 80 + 10 # Scale to 10-90 | |
| else: | |
| norm = 50 | |
| normalized.append(norm) | |
| # Custom hover text showing original AUP scores | |
| hover_texts = [f"<b>{display_name}</b><br>{cat}: <b>{val:.1f}</b>" | |
| for cat, val in zip(categories, original_vals)] | |
| fig.add_trace(go.Scatterpolar( | |
| r=normalized + [normalized[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', fill='toself', name=display_name, | |
| line=dict(color=color, width=2), marker=dict(color=color, size=6), | |
| fillcolor=hex_to_rgba(color, 0.15), opacity=0.9, | |
| text=hover_texts + [hover_texts[0]], | |
| hovertemplate='%{text}<extra></extra>' | |
| )) | |
| fig.update_layout( | |
| height=600, margin=dict(l=100, r=250, t=80, b=60), | |
| title=dict(text=f"π― Top {top_n} Methods: AUP Scores in Radar Chart", x=0.5, font=dict(size=18)), | |
| # title=dict(text=f"π― Top {top_n} Methods: AUP Scores in Radar Chart", x=0.5, font=dict(size=18)), | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 100], tickfont=dict(size=11), | |
| tickvals=[], showticklabels=False)), | |
| legend=dict(font=dict(size=12), x=1.05, y=1, bgcolor='rgba(255,255,255,0.95)', | |
| bordercolor='#ddd', borderwidth=1, title=dict(text="Methods (sorted by Avg AUP)", font=dict(size=13))), | |
| hoverlabel=dict(bgcolor="white", font_size=14, font_family="Arial", bordercolor="#333") | |
| ) | |
| return fig | |
| def create_group_bar_chart(df, tasks, top_n=15): | |
| """Create grouped bar chart with Avg AUP included and rank numbers.""" | |
| df_top = df.head(top_n).copy() | |
| methods = df_top["Method"].tolist() | |
| model_colors = get_model_colors(df) | |
| model_ranks = get_model_ranks(df) | |
| all_benchmarks = tasks + ["Avg_AUP"] | |
| fig = go.Figure() | |
| for method in methods: | |
| row = df_top[df_top["Method"] == method].iloc[0] | |
| color = model_colors.get(method, "#808080") | |
| rank = model_ranks.get(method, 0) | |
| display_name = f"#{rank} {method}" | |
| y_vals, x_vals = [], [] | |
| for bench in all_benchmarks: | |
| aup = row.get("Avg_AUP") if bench == "Avg_AUP" else row.get(f"{bench}_AUP") | |
| if aup is not None and not (isinstance(aup, float) and aup != aup): | |
| y_vals.append(aup) | |
| x_vals.append("Avg AUP" if bench == "Avg_AUP" else bench) | |
| if y_vals: | |
| fig.add_trace(go.Bar( | |
| name=display_name, x=x_vals, y=y_vals, marker_color=color, | |
| hovertemplate=f"<b>{display_name}</b><br>%{{x}}: %{{y:.1f}}<extra></extra>" | |
| )) | |
| fig.update_layout( | |
| height=550, margin=dict(l=60, r=250, t=80, b=100), | |
| title=dict(text=f"π Top {top_n} Methods: AUP Scores in Bar Chart", x=0.5, font=dict(size=18)), | |
| # title=dict(text=f"π Top {top_n} Methods: AUP Scores of Different Benchmarks", x=0.5, font=dict(size=18)), | |
| xaxis_title="Benchmark", yaxis_title="AUP Score", | |
| barmode='group', bargap=0.2, bargroupgap=0.05, | |
| legend=dict(font=dict(size=11), x=1.02, y=1, bgcolor='rgba(255,255,255,0.95)', | |
| bordercolor='#ddd', borderwidth=1, title=dict(text="Methods (sorted by Avg AUP)", font=dict(size=12))), | |
| hoverlabel=dict(bgcolor="white", font_size=14, font_family="Arial") | |
| ) | |
| return fig | |
| def create_aup_curve_chart(raw_data, tasks, df, top_n=15): | |
| """Create 2x3 subplot grid of AUP curves with quadratic fitting (same as plot_lines.py).""" | |
| df_top = df.head(top_n).copy() | |
| model_colors = get_model_colors(df) | |
| model_ranks = get_model_ranks(df) | |
| methods_to_show = set(df_top["Method"].tolist()) | |
| # Build per-task data: {task: {method: [(rho, y), ...]}} | |
| task_data = {t: {} for t in tasks} | |
| for task in tasks: | |
| for method, pairs in raw_data.get(task, {}).items(): | |
| if method in methods_to_show: | |
| task_data[task][method] = [(p[0], p[1]) for p in pairs] | |
| # Compute average data: average TPF and Acc by index across tasks (all tasks have same length) | |
| avg_data = {} | |
| for method in methods_to_show: | |
| task_points = [task_data.get(t, {}).get(method, []) for t in tasks] | |
| task_points = [p for p in task_points if p] # filter empty | |
| if not task_points: | |
| continue | |
| n_points = len(task_points[0]) | |
| avg_data[method] = [ | |
| (np.mean([tp[i][0] for tp in task_points]), sum(tp[i][1] for tp in task_points) / 5) | |
| for i in range(n_points) | |
| ] | |
| # 6 subplots: 5 tasks + 1 Average at (2,3) | |
| titles = tasks + ["Average"] | |
| fig = make_subplots(rows=2, cols=3, subplot_titles=titles, | |
| horizontal_spacing=0.08, vertical_spacing=0.15) | |
| # Track which methods have been added to legend | |
| legend_added = set() | |
| def get_pos(idx): | |
| if idx < 3: | |
| return (1, idx + 1) | |
| return (2, idx - 2) # idx=3->(2,1), idx=4->(2,2), idx=5->(2,3) | |
| # Helper to draw curve for a given subplot | |
| def draw_curve(pairs, method, row, col): | |
| nonlocal legend_added | |
| if not pairs: | |
| return | |
| color = model_colors.get(method, "#808080") | |
| rank = model_ranks.get(method, 0) | |
| display_name = f"#{rank} {method}" | |
| show_legend = method not in legend_added | |
| if show_legend: | |
| legend_added.add(method) | |
| rho, y = zip(*sorted(pairs, key=lambda x: x[0])) | |
| rho, y = np.array(rho), np.array(y) | |
| # Generate smooth curve (quadratic fitting, same as plot_lines.py) | |
| if len(rho) >= 3: | |
| z = np.polyfit(rho, y, 2) | |
| p = np.poly1d(z) | |
| x_smooth = np.linspace(rho.min(), rho.max(), 300) | |
| y_smooth = p(x_smooth) | |
| elif len(rho) == 2: | |
| x_smooth = np.linspace(rho.min(), rho.max(), 300) | |
| if rho[1] != rho[0]: | |
| a = (y[1] - y[0]) / ((rho[1] - rho[0]) ** 2) | |
| y_smooth = a * (x_smooth - rho[0]) ** 2 + y[0] | |
| else: | |
| y_smooth = np.linspace(y[0], y[1], 300) | |
| else: | |
| x_smooth, y_smooth = rho, y | |
| # Add fitted curve | |
| fig.add_trace(go.Scatter( | |
| x=x_smooth, y=y_smooth, mode='lines', name=display_name, | |
| line=dict(color=color, width=2.5), opacity=0.85, | |
| showlegend=show_legend, legendgroup=method, | |
| hoverinfo='skip' | |
| ), row=row, col=col) | |
| # Add markers at original data points | |
| fig.add_trace(go.Scatter( | |
| x=rho, y=y, mode='markers', name=display_name, | |
| marker=dict(color='white', size=8, line=dict(color=color, width=2)), | |
| showlegend=False, legendgroup=method, | |
| hovertemplate=f"<b>{display_name}</b><br>TPF: %{{x:.2f}}<br>Acc: %{{y:.1f}}<extra></extra>" | |
| ), row=row, col=col) | |
| # Draw 5 task subplots | |
| for idx, task in enumerate(tasks): | |
| row, col = get_pos(idx) | |
| data = task_data.get(task, {}) | |
| for method in df_top["Method"].tolist(): | |
| if method in data: | |
| draw_curve(data[method], method, row, col) | |
| # Draw Average subplot at (2, 3) | |
| for method in df_top["Method"].tolist(): | |
| if method in avg_data: | |
| draw_curve(avg_data[method], method, 2, 3) | |
| fig.update_layout( | |
| height=550, margin=dict(l=60, r=250, t=80, b=60), | |
| title=dict(text=f"π Top {top_n} Methods: Accuracy-Parallelism Curves", x=0.5, font=dict(size=18)), | |
| legend=dict(font=dict(size=11), x=1.02, y=1, bgcolor='rgba(255,255,255,0.95)', | |
| bordercolor='#ddd', borderwidth=1, title=dict(text="Methods (sorted by Avg AUP)", font=dict(size=12)), | |
| tracegroupgap=1, itemsizing='constant'), | |
| hoverlabel=dict(bgcolor="white", font_size=14, font_family="Arial") | |
| ) | |
| # Update axes labels for 6 subplots | |
| for idx in range(6): | |
| row, col = get_pos(idx) | |
| fig.update_xaxes(title_text="TPF (Tokens per Forward)" if idx >= 3 else "", row=row, col=col) | |
| fig.update_yaxes(title_text="Acc (%)" if col == 1 else "", row=row, col=col) | |
| return fig | |