Spaces:
Running
Running
| import plotly.graph_objects as go | |
| import numpy as np | |
| import pandas as pd | |
| import json | |
| import os | |
| from datetime import datetime | |
| from leaderboard_utils import ( | |
| get_combined_leaderboard, | |
| GAME_ORDER | |
| ) | |
| # Load model colors | |
| with open('assets/model_color.json', 'r', encoding='utf-8') as f: | |
| MODEL_COLORS = json.load(f) | |
| GAME_SCORE_COLUMNS = { | |
| "Super Mario Bros": "Score", | |
| "Sokoban": "Levels Cracked", | |
| "2048": "Score", | |
| "Candy Crush": "Average Score", | |
| "Tetris (complete)": "Score", | |
| "Tetris (planning only)": "Score", | |
| "Ace Attorney": "Score" | |
| } | |
| def get_model_prefix(name): | |
| return name.split('-')[0] | |
| def normalize_values(values, mean, std): | |
| """ | |
| Normalize values using z-score and scale to 0-100 range | |
| Args: | |
| values (list): List of values to normalize | |
| mean (float): Mean value for normalization | |
| std (float): Standard deviation for normalization | |
| Returns: | |
| list: Normalized values scaled to 0-100 range | |
| """ | |
| if std == 0: | |
| return [50 if v > 0 else 0 for v in values] # Handle zero std case | |
| z_scores = [(v - mean) / std for v in values] | |
| # Scale z-scores to 0-100 range, with mean at 50 | |
| scaled_values = [max(0, min(100, (z * 30) + 35)) for z in z_scores] | |
| return scaled_values | |
| def simplify_model_name(name): | |
| if name == "claude-3-7-sonnet-20250219(thinking)": | |
| name ="claude-3-7-thinking" | |
| parts = name.split('-') | |
| return '-'.join(parts[:4]) + '-...' if len(parts) > 4 else name | |
| def create_horizontal_bar_chart(df, game_name): | |
| """Creates a horizontal bar chart for a given game's leaderboard data.""" | |
| if df is None or df.empty: | |
| # Return a placeholder or an empty figure if there's no data | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title=f"No data available for {game_name}", | |
| xaxis_title="Score", | |
| yaxis_title="Player", | |
| plot_bgcolor='rgba(0,0,0,0)', | |
| paper_bgcolor='rgba(0,0,0,0)', | |
| font=dict(color='#2c3e50') | |
| ) | |
| return fig | |
| score_col = "Score" # Standardized score column name | |
| if score_col not in df.columns: | |
| fig = go.Figure() | |
| fig.update_layout(title=f"'{score_col}' column not found for {game_name}") | |
| return fig | |
| # Ensure the score column is numeric for sorting and plotting | |
| df[score_col] = pd.to_numeric(df[score_col], errors='coerce') | |
| df_cleaned = df.dropna(subset=[score_col]) # Remove rows where score is NaN after conversion | |
| if df_cleaned.empty: | |
| fig = go.Figure() | |
| fig.update_layout(title=f"No valid score data to plot for {game_name}") | |
| return fig | |
| # Sort values for chart display (lowest score at the top of the chart) | |
| # The input df is already sorted descending by score from leaderboard_utils | |
| # Re-sorting ascending=True here means player with lowest score is at the top of the y-axis categories | |
| df_sorted = df_cleaned.sort_values(by=score_col, ascending=True) | |
| fig = go.Figure( | |
| go.Bar( | |
| y=df_sorted['Player'], | |
| x=df_sorted[score_col], | |
| orientation='h', | |
| marker=dict( | |
| color=df_sorted[score_col], | |
| colorscale='Viridis', # Example colorscale, can be changed | |
| line=dict(color='#2c3e50', width=1) | |
| ), | |
| hovertext=df_sorted[score_col].round(2).astype(str) + ' points', | |
| hoverinfo='y+text' | |
| ) | |
| ) | |
| fig.update_layout( | |
| title=dict( | |
| text=f'{game_name} Scores', | |
| x=0.5, | |
| font=dict(size=20, color='#2c3e50') | |
| ), | |
| xaxis_title="Score", | |
| yaxis_title="Player", | |
| plot_bgcolor='rgba(0,0,0,0)', # Transparent plot background | |
| paper_bgcolor='rgba(0,0,0,0)', # Transparent paper background | |
| font=dict(color='#2c3e50'), # Dark text for better readability on light backgrounds | |
| margin=dict(l=150, r=20, t=50, b=50), # Adjust margins for player names | |
| yaxis=dict( | |
| automargin=True, | |
| tickfont=dict(size=10) | |
| ), | |
| xaxis=dict(gridcolor='#e0e0e0') # Light gridlines for x-axis | |
| ) | |
| return fig | |
| def create_radar_charts(df): | |
| game_cols = [c for c in df.columns if c.endswith(" Score")] | |
| categories = [c.replace(" Score", "") for c in game_cols] | |
| for col in game_cols: | |
| vals = df[col].replace("n/a", 0).infer_objects(copy=False).astype(float) | |
| mean, std = vals.mean(), vals.std() | |
| df[f"norm_{col}"] = normalize_values(vals, mean, std) | |
| fig = go.Figure() | |
| for _, row in df.iterrows(): | |
| player = row["Player"] | |
| r = [row[f"norm_{c}"] for c in game_cols] | |
| color = MODEL_COLORS.get(player, '#808080') # fallback to gray | |
| fig.add_trace(go.Scatterpolar( | |
| r=r + [r[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', | |
| fill='toself', | |
| name=player, | |
| line=dict(color=color, width=2), | |
| marker=dict(color=color), | |
| fillcolor=color + '33', # add transparency to fill (33 = ~20% opacity) | |
| opacity=0.8 | |
| )) | |
| fig.update_layout( | |
| autosize=False, | |
| width=800, | |
| height=600, | |
| margin=dict(l=80, r=150, t=20, b=20), | |
| title=dict( | |
| text="Radar Chart of AI Performance (Normalized)", | |
| pad=dict(t=10) | |
| ), | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 100])), | |
| legend=dict( | |
| font=dict(size=9), | |
| itemsizing='trace', | |
| x=1.4, | |
| y=1, | |
| xanchor='left', | |
| yanchor='top', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1 | |
| ) | |
| ) | |
| return fig | |
| def get_combined_leaderboard_with_radar(rank_data, selected_games): | |
| df = get_combined_leaderboard(rank_data, selected_games) | |
| # Create a copy for visualization to avoid modifying the original | |
| df_viz = df.copy() | |
| return df, create_radar_charts(df_viz) | |
| def create_group_bar_chart(df, top_n=5): | |
| game_cols = {} | |
| for game in GAME_ORDER: | |
| col = f"{game} Score" | |
| if col in df.columns: | |
| # Replace "n/a" with np.nan and handle downcasting properly | |
| df[col] = df[col].replace("n/a", np.nan).infer_objects(copy=False).astype(float) | |
| if df[col].notna().any(): | |
| game_cols[game] = col | |
| if not game_cols: | |
| return go.Figure().update_layout(title="No data available") | |
| # Drop players with no data | |
| df = df.dropna(subset=game_cols.values(), how='all') | |
| # Normalize scores per game | |
| for game, col in game_cols.items(): | |
| valid = df[col].dropna() | |
| norm_col = f"norm_{col}" | |
| if valid.empty: | |
| df[norm_col] = np.nan | |
| else: | |
| mean, std = valid.mean(), valid.std() | |
| normalized = normalize_values(valid, mean, std) | |
| df[norm_col] = np.nan | |
| df.loc[valid.index, norm_col] = normalized | |
| # Build consistent game order (X-axis) | |
| sorted_games = [game for game in GAME_ORDER if f"norm_{game} Score" in df.columns] | |
| # Format game names with line breaks | |
| formatted_games = [] | |
| for game in sorted_games: | |
| if len(game) > 10 and ' ' in game: | |
| parts = game.split(' ') | |
| midpoint = len(parts) // 2 | |
| formatted_name = ' '.join(parts[:midpoint]) + '<br>' + ' '.join(parts[midpoint:]) | |
| formatted_games.append(formatted_name) | |
| else: | |
| formatted_games.append(game) | |
| # Create mapping from original to formatted names | |
| game_display_map = dict(zip(sorted_games, formatted_games)) | |
| # For each game, get top performers and create combined x-axis categories | |
| fig = go.Figure() | |
| all_x_categories = [] | |
| all_players = set() | |
| unique_x_labels = [] | |
| # First pass: collect all players and create x-axis categories | |
| game_rankings = {} | |
| for game in sorted_games: | |
| col = f"norm_{game} Score" | |
| # Get valid scores for this game and sort by score (highest first) | |
| game_data = df[df[col].notna()].copy() | |
| game_data = game_data.sort_values(by=col, ascending=False) | |
| # Store rankings for this game (limit to top_n) | |
| game_rankings[game] = [] | |
| for i, (_, row) in enumerate(game_data.iterrows()): | |
| if i >= top_n: # Limit to top_n performers | |
| break | |
| player = row["Player"] | |
| score = row[col] | |
| rank = i + 1 | |
| x_category = f"{game_display_map[game]}<br>#{rank}" | |
| game_rankings[game].append({ | |
| 'player': player, | |
| 'score': score, | |
| 'x_category': x_category, | |
| 'rank': rank | |
| }) | |
| all_x_categories.append(x_category) | |
| all_players.add(player) | |
| # Show label at the middle position based on number of models | |
| middle_position = (top_n + 1) // 2 | |
| if rank == middle_position: | |
| # Special case for Super Mario Bros (planning only) | |
| if game == "Super Mario Bros": | |
| unique_x_labels.append("SMB") | |
| else: | |
| unique_x_labels.append(game_display_map[game]) # Show just game name without rank | |
| else: | |
| unique_x_labels.append("") # Empty string for other ranks | |
| # Second pass: create traces for each player | |
| for player in sorted(all_players): | |
| x_vals = [] | |
| y_vals = [] | |
| for game in sorted_games: | |
| # Find this player's data for this game | |
| player_data = None | |
| for data in game_rankings[game]: | |
| if data['player'] == player: | |
| player_data = data | |
| break | |
| if player_data: | |
| x_vals.append(player_data['x_category']) | |
| y_vals.append(player_data['score']) | |
| if x_vals: # Only add trace if player has data | |
| fig.add_trace(go.Bar( | |
| name=player, | |
| x=x_vals, | |
| y=y_vals, | |
| marker_color=MODEL_COLORS.get(player, '#808080'), | |
| hovertemplate="<b>%{fullData.name}</b><br>Score: %{y:.1f}<extra></extra>" | |
| )) | |
| fig.update_layout( | |
| autosize=True, | |
| height=550, | |
| margin=dict(l=50, r=50, t=20, b=20), | |
| title=dict(text=f"Grouped Bar Chart - Top {top_n} Performers by Game", pad=dict(t=10)), | |
| xaxis_title="Games (Ranked by Performance)", | |
| yaxis_title="Normalized Score", | |
| xaxis=dict( | |
| categoryorder='array', | |
| categoryarray=all_x_categories, | |
| tickangle=0, # Keep text horizontal since we're using line breaks | |
| ticktext=unique_x_labels, # Show labels only for first occurrence | |
| tickvals=all_x_categories | |
| ), | |
| barmode='group', | |
| bargap=0.2, # Gap between game categories | |
| bargroupgap=0.05, # Gap between bars in a group | |
| uniformtext=dict(mode='hide', minsize=8), # Hide text that doesn't fit | |
| legend=dict( | |
| font=dict(size=12), | |
| title="Choose your model 💡 (click / double-click)", | |
| itemsizing='trace', | |
| x=1.1, | |
| y=1, | |
| xanchor='left', | |
| yanchor='top', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1 | |
| ) | |
| ) | |
| return fig | |
| def get_combined_leaderboard_with_group_bar(rank_data, selected_games, top_n=5, limit_to_top_n=None): | |
| df = get_combined_leaderboard(rank_data, selected_games, limit_to_top_n) | |
| # Create a copy for visualization to avoid modifying the original | |
| df_viz = df.copy() | |
| return df, create_group_bar_chart(df_viz, top_n) | |
| def hex_to_rgba(hex_color, alpha=0.2): | |
| hex_color = hex_color.lstrip('#') | |
| r = int(hex_color[0:2], 16) | |
| g = int(hex_color[2:4], 16) | |
| b = int(hex_color[4:6], 16) | |
| return f'rgba({r}, {g}, {b}, {alpha})' | |
| def create_single_radar_chart(df, selected_games=None, highlight_models=None, chart_title=None, top_n=None, full_df=None): | |
| if selected_games is None: | |
| selected_games = ['Super Mario Bros', '2048', 'Candy Crush', 'Sokoban', 'Ace Attorney'] | |
| # Format game names | |
| formatted_games = [] | |
| for game in selected_games: | |
| if game == 'Super Mario Bros': | |
| formatted_games.append('SMB') # Clean name without planning only | |
| else: | |
| formatted_games.append(game) # Keep other names as is | |
| game_cols = [f"{game} Score" for game in selected_games] | |
| categories = formatted_games | |
| # Use full dataset for normalization to keep consistent scale | |
| # If full_df is not provided, use the current df (fallback for backward compatibility) | |
| normalization_df = full_df if full_df is not None else df | |
| # Normalize using the full dataset but apply to the limited df | |
| for col in game_cols: | |
| # Get normalization parameters from full dataset | |
| # Use where() to avoid FutureWarning about downcasting in replace() | |
| full_series = normalization_df[col].copy() | |
| full_series = full_series.where(full_series != "n/a", 0) | |
| full_vals = full_series.astype(float) | |
| mean, std = full_vals.mean(), full_vals.std() | |
| # Apply normalization to the limited df | |
| # Use where() to avoid FutureWarning about downcasting in replace() | |
| limited_series = df[col].copy() | |
| limited_series = limited_series.where(limited_series != "n/a", 0) | |
| limited_vals = limited_series.astype(float) | |
| df[f"norm_{col}"] = normalize_values(limited_vals, mean, std) | |
| # Group players by prefix and sort alphabetically | |
| model_groups = {} | |
| for player in df["Player"]: | |
| prefix = get_model_prefix(player) | |
| model_groups.setdefault(prefix, []).append(player) | |
| # Sort each group alphabetically | |
| for prefix in model_groups: | |
| model_groups[prefix] = sorted(model_groups[prefix], key=str.lower) | |
| # Get sorted prefixes and create ordered player list | |
| sorted_prefixes = sorted(model_groups.keys(), key=str.lower) | |
| grouped_players = [] | |
| for prefix in sorted_prefixes: | |
| grouped_players.extend(model_groups[prefix]) | |
| fig = go.Figure() | |
| for player in grouped_players: | |
| row = df[df["Player"] == player] | |
| if row.empty: | |
| continue | |
| row = row.iloc[0] | |
| is_highlighted = highlight_models and player in highlight_models | |
| color = 'red' if is_highlighted else MODEL_COLORS.get(player, '#808080') | |
| fillcolor = 'rgba(255, 0, 0, 0.4)' if is_highlighted else hex_to_rgba(color, 0.2) | |
| r = [row[f"norm_{col}"] for col in game_cols] | |
| # Convert player name to lowercase for the legend | |
| display_name = player.lower() | |
| fig.add_trace(go.Scatterpolar( | |
| r=r + [r[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', | |
| fill='toself', | |
| name=display_name, # Use lowercase name in legend | |
| line=dict(color=color, width=6 if is_highlighted else 2), | |
| marker=dict(color=color, size=10 if is_highlighted else 6), | |
| fillcolor=fillcolor, | |
| opacity=1.0 if is_highlighted else 0.7, | |
| hovertemplate='<b>%{fullData.name}</b><br>Game: %{theta}<br>Score: %{r:.1f}<extra></extra>' | |
| )) | |
| # Dynamic title based on the data source and top_n | |
| if chart_title is None: | |
| if top_n is not None: | |
| chart_title = f"Radar Chart - Top {top_n} Performers by Game" | |
| else: | |
| # Fallback title | |
| if len(df) <= 10: | |
| chart_title = "🎮 Agent Performance Across Games" | |
| else: | |
| chart_title = "🤖 Model Performance Across Games" | |
| fig.update_layout( | |
| autosize=True, | |
| height=550, # Reduced height for better proportion with legend | |
| margin=dict(l=400, r=100, t=20, b=20), | |
| title=dict( | |
| text=chart_title, | |
| x=0.5, | |
| xanchor='center', | |
| yanchor='top', | |
| y=0.95, | |
| font=dict(size=20), | |
| pad=dict(b=20) | |
| ), | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 100], | |
| tickangle=45, | |
| tickfont=dict(size=12), | |
| gridcolor='lightgray', | |
| gridwidth=1, | |
| angle=45 | |
| ), | |
| angularaxis=dict( | |
| tickfont=dict(size=14, weight='bold'), | |
| tickangle=0 | |
| ) | |
| ), | |
| legend=dict( | |
| font=dict(size=12), | |
| title="Choose your model 💡 (click / double-click)", | |
| itemsizing='trace', | |
| x=-1.4, # Moved further left | |
| y=0.8, # Moved to top | |
| yanchor='top', | |
| xanchor='left', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1 | |
| ) | |
| ) | |
| fig.update_layout( | |
| legend=dict( | |
| itemclick="toggleothers", # This will make clicked item the only visible one | |
| itemdoubleclick="toggle" # Double click toggles visibility | |
| ) | |
| ) | |
| return fig | |
| def get_combined_leaderboard_with_single_radar(rank_data, selected_games, highlight_models=None, limit_to_top_n=None, chart_title=None, top_n=None): | |
| # Get full dataset for normalization | |
| full_df = get_combined_leaderboard(rank_data, selected_games, limit_to_top_n=None) | |
| # Get limited dataset for display | |
| df = get_combined_leaderboard(rank_data, selected_games, limit_to_top_n) | |
| selected_game_names = [g for g, sel in selected_games.items() if sel] | |
| # Create copies for visualization to avoid modifying the original | |
| df_viz = df.copy() | |
| full_df_viz = full_df.copy() | |
| return df, create_single_radar_chart(df_viz, selected_game_names, highlight_models, chart_title, top_n, full_df_viz) | |
| def create_organization_radar_chart(rank_data): | |
| df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) | |
| orgs = df["Organization"].unique() | |
| game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] | |
| categories = [g.replace(" Score", "") for g in game_cols] | |
| avg_df = pd.DataFrame([ | |
| { | |
| **{col: df[df["Organization"] == org][col].where(df[df["Organization"] == org][col] != "n/a", 0).astype(float).mean() for col in game_cols}, | |
| "Organization": org | |
| } | |
| for org in orgs | |
| ]) | |
| for col in game_cols: | |
| vals = avg_df[col] | |
| mean, std = vals.mean(), vals.std() | |
| avg_df[f"norm_{col}"] = normalize_values(vals, mean, std) | |
| fig = go.Figure() | |
| for _, row in avg_df.iterrows(): | |
| r = [row[f"norm_{col}"] for col in game_cols] | |
| fig.add_trace(go.Scatterpolar( | |
| r=r + [r[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', | |
| fill='toself', | |
| name=row["Organization"] | |
| )) | |
| fig.update_layout( | |
| autosize=False, | |
| width=800, | |
| height=600, | |
| margin=dict(l=80, r=150, t=20, b=20), | |
| title=dict( | |
| text="Radar Chart: Organization Performance (Normalized)", | |
| pad=dict(t=10) | |
| ), | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 100])), | |
| legend=dict( | |
| font=dict(size=9), | |
| itemsizing='trace', | |
| x=1.4, | |
| y=1, | |
| xanchor='left', | |
| yanchor='top', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1 | |
| ) | |
| ) | |
| return fig | |
| def create_top_players_radar_chart(rank_data, n=5): | |
| df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) | |
| top_players = df.head(n)["Player"].tolist() | |
| top_df = df[df["Player"].isin(top_players)] | |
| game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] | |
| categories = [g.replace(" Score", "") for g in game_cols] | |
| for col in game_cols: | |
| # Replace "n/a" with 0 and handle downcasting properly | |
| # Use where() to avoid FutureWarning about downcasting in replace() | |
| series = top_df[col].copy() | |
| series = series.where(series != "n/a", 0) | |
| vals = series.astype(float) | |
| mean, std = vals.mean(), vals.std() | |
| top_df[f"norm_{col}"] = normalize_values(vals, mean, std) | |
| fig = go.Figure() | |
| for _, row in top_df.iterrows(): | |
| r = [row[f"norm_{col}"] for col in game_cols] | |
| fig.add_trace(go.Scatterpolar( | |
| r=r + [r[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', | |
| fill='toself', | |
| name=row["Player"] | |
| )) | |
| fig.update_layout( | |
| autosize=False, | |
| width=800, | |
| height=600, | |
| margin=dict(l=80, r=150, t=20, b=20), | |
| title=dict( | |
| text=f"Top {n} Players Radar Chart (Normalized)", | |
| pad=dict(t=10) | |
| ), | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 100])), | |
| legend=dict( | |
| font=dict(size=9), | |
| itemsizing='trace', | |
| x=1.4, | |
| y=1, | |
| xanchor='left', | |
| yanchor='top', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1 | |
| ) | |
| ) | |
| return fig | |
| def create_player_radar_chart(rank_data, player_name): | |
| df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) | |
| player_df = df[df["Player"] == player_name] | |
| if player_df.empty: | |
| return go.Figure().update_layout( | |
| title=dict(text="Player not found", pad=dict(t=10)), | |
| autosize=False, | |
| width=800, | |
| height=400 | |
| ) | |
| game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] | |
| categories = [g.replace(" Score", "") for g in game_cols] | |
| for col in game_cols: | |
| # Replace "n/a" with 0 and handle downcasting properly | |
| # Use where() to avoid FutureWarning about downcasting in replace() | |
| player_series = player_df[col].copy() | |
| player_series = player_series.where(player_series != "n/a", 0) | |
| vals = player_series.astype(float) | |
| df_series = df[col].copy() | |
| df_series = df_series.where(df_series != "n/a", 0) | |
| df_vals = df_series.astype(float) | |
| mean, std = df_vals.mean(), df_vals.std() | |
| player_df[f"norm_{col}"] = normalize_values(vals, mean, std) | |
| fig = go.Figure() | |
| for _, row in player_df.iterrows(): | |
| r = [row[f"norm_{col}"] for col in game_cols] | |
| fig.add_trace(go.Scatterpolar( | |
| r=r + [r[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', | |
| fill='toself', | |
| name=row["Player"] | |
| )) | |
| fig.update_layout( | |
| autosize=False, | |
| width=800, | |
| height=600, | |
| margin=dict(l=80, r=150, t=20, b=20), | |
| title=dict( | |
| text=f"{row['Player']} Radar Chart (Normalized)", | |
| pad=dict(t=10) | |
| ), | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 100])), | |
| legend=dict( | |
| font=dict(size=9), | |
| itemsizing='trace', | |
| x=1.4, | |
| y=1, | |
| xanchor='left', | |
| yanchor='top', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1 | |
| ) | |
| ) | |
| return fig | |
| def save_normalized_data(df, selected_games, filename="normalized_data.json"): | |
| """ | |
| Save normalized data to a JSON file for caching | |
| Args: | |
| df (pd.DataFrame): DataFrame with raw scores | |
| selected_games (dict): Dictionary of selected games | |
| filename (str): Output filename | |
| """ | |
| game_cols = [f"{game} Score" for game in GAME_ORDER if f"{game} Score" in df.columns] | |
| # Calculate normalization parameters and normalized values | |
| normalization_data = { | |
| "timestamp": datetime.now().isoformat(), | |
| "selected_games": selected_games, | |
| "games": {}, | |
| "players": {} | |
| } | |
| # Store normalization parameters per game | |
| for col in game_cols: | |
| game_name = col.replace(" Score", "") | |
| vals = df[col].replace("n/a", 0).infer_objects(copy=False).astype(float) | |
| mean, std = vals.mean(), vals.std() | |
| normalization_data["games"][game_name] = { | |
| "mean": mean, | |
| "std": std, | |
| "raw_scores": vals.to_dict() | |
| } | |
| # Store normalized scores per player | |
| for _, row in df.iterrows(): | |
| player = row["Player"] | |
| player_data = {"organization": row.get("Organization", "unknown")} | |
| for col in game_cols: | |
| game_name = col.replace(" Score", "") | |
| raw_score = row[col] | |
| if raw_score != "n/a": | |
| raw_score = float(raw_score) | |
| mean = normalization_data["games"][game_name]["mean"] | |
| std = normalization_data["games"][game_name]["std"] | |
| normalized = normalize_values([raw_score], mean, std)[0] | |
| else: | |
| raw_score = "n/a" | |
| normalized = 0 | |
| player_data[f"{game_name}_raw"] = raw_score | |
| player_data[f"{game_name}_normalized"] = normalized | |
| normalization_data["players"][player] = player_data | |
| # Save to file | |
| os.makedirs("cache", exist_ok=True) | |
| filepath = os.path.join("cache", filename) | |
| with open(filepath, 'w') as f: | |
| json.dump(normalization_data, f, indent=2) | |
| print(f"Normalized data saved to {filepath}") | |
| return filepath | |
| def load_normalized_data(filename="normalized_data.json"): | |
| """ | |
| Load normalized data from a JSON file | |
| Args: | |
| filename (str): Input filename | |
| Returns: | |
| dict: Normalized data or None if file doesn't exist | |
| """ | |
| filepath = os.path.join("cache", filename) | |
| if not os.path.exists(filepath): | |
| return None | |
| try: | |
| with open(filepath, 'r') as f: | |
| data = json.load(f) | |
| print(f"Normalized data loaded from {filepath}") | |
| return data | |
| except Exception as e: | |
| print(f"Error loading normalized data: {e}") | |
| return None | |
| def get_normalized_scores_from_cache(players, games, cache_data): | |
| """ | |
| Extract normalized scores from cached data | |
| Args: | |
| players (list): List of player names | |
| games (list): List of game names | |
| cache_data (dict): Cached normalization data | |
| Returns: | |
| pd.DataFrame: DataFrame with normalized scores | |
| """ | |
| data = [] | |
| for player in players: | |
| if player in cache_data["players"]: | |
| player_data = {"Player": player} | |
| player_cache = cache_data["players"][player] | |
| for game in games: | |
| raw_key = f"{game}_raw" | |
| norm_key = f"{game}_normalized" | |
| if raw_key in player_cache: | |
| player_data[f"{game} Score"] = player_cache[raw_key] | |
| player_data[f"norm_{game} Score"] = player_cache[norm_key] | |
| else: | |
| player_data[f"{game} Score"] = "n/a" | |
| player_data[f"norm_{game} Score"] = 0 | |
| data.append(player_data) | |
| return pd.DataFrame(data) | |
| def save_visualization(fig, filename): | |
| fig.write_image(filename) | |
| def generate_and_save_normalized_data(rank_data, filename="normalized_data.json"): | |
| """ | |
| Generate normalized data for all games and save to file | |
| Args: | |
| rank_data (dict): Raw rank data | |
| filename (str): Output filename | |
| Returns: | |
| str: Path to saved file | |
| """ | |
| # Select all games | |
| all_games = {game: True for game in GAME_ORDER} | |
| # Get combined leaderboard | |
| df = get_combined_leaderboard(rank_data, all_games) | |
| # Save normalized data | |
| return save_normalized_data(df, all_games, filename) | |
| def create_single_radar_chart_with_cache(df, selected_games=None, highlight_models=None, use_cache=True, cache_filename="normalized_data.json"): | |
| """ | |
| Create radar chart with optional caching support | |
| """ | |
| if selected_games is None: | |
| selected_games = ['Super Mario Bros', '2048', 'Candy Crush', 'Sokoban', 'Ace Attorney'] | |
| # Try to load from cache first | |
| cached_data = None | |
| if use_cache: | |
| cached_data = load_normalized_data(cache_filename) | |
| if cached_data: | |
| # Use cached normalized data | |
| players = df["Player"].tolist() | |
| df_normalized = get_normalized_scores_from_cache(players, selected_games, cached_data) | |
| # Merge with original df to get Organization info | |
| df_normalized = df_normalized.merge(df[["Player", "Organization"]], on="Player", how="left") | |
| else: | |
| # Fall back to on-the-fly normalization | |
| df_normalized = df.copy() | |
| game_cols = [f"{game} Score" for game in selected_games] | |
| # Normalize | |
| for col in game_cols: | |
| vals = df_normalized[col].replace("n/a", 0).infer_objects(copy=False).astype(float) | |
| mean, std = vals.mean(), vals.std() | |
| df_normalized[f"norm_{col}"] = normalize_values(vals, mean, std) | |
| # Format game names | |
| formatted_games = [] | |
| for game in selected_games: | |
| if game == 'Super Mario Bros': | |
| formatted_games.append('SMB') | |
| else: | |
| formatted_games.append(game) | |
| categories = formatted_games | |
| # Group players by prefix and sort alphabetically | |
| model_groups = {} | |
| for player in df_normalized["Player"]: | |
| prefix = get_model_prefix(player) | |
| model_groups.setdefault(prefix, []).append(player) | |
| # Sort each group alphabetically | |
| for prefix in model_groups: | |
| model_groups[prefix] = sorted(model_groups[prefix], key=str.lower) | |
| # Get sorted prefixes and create ordered player list | |
| sorted_prefixes = sorted(model_groups.keys(), key=str.lower) | |
| grouped_players = [] | |
| for prefix in sorted_prefixes: | |
| grouped_players.extend(model_groups[prefix]) | |
| fig = go.Figure() | |
| for player in grouped_players: | |
| row = df_normalized[df_normalized["Player"] == player] | |
| if row.empty: | |
| continue | |
| row = row.iloc[0] | |
| is_highlighted = highlight_models and player in highlight_models | |
| color = 'red' if is_highlighted else MODEL_COLORS.get(player, '#808080') | |
| fillcolor = 'rgba(255, 0, 0, 0.4)' if is_highlighted else hex_to_rgba(color, 0.2) | |
| # Get normalized values | |
| if cached_data: | |
| r = [row[f"norm_{game} Score"] for game in selected_games] | |
| else: | |
| r = [row[f"norm_{game} Score"] for game in selected_games] | |
| display_name = player.lower() | |
| fig.add_trace(go.Scatterpolar( | |
| r=r + [r[0]], | |
| theta=categories + [categories[0]], | |
| mode='lines+markers', | |
| fill='toself', | |
| name=display_name, | |
| line=dict(color=color, width=6 if is_highlighted else 2), | |
| marker=dict(color=color, size=10 if is_highlighted else 6), | |
| fillcolor=fillcolor, | |
| opacity=1.0 if is_highlighted else 0.7, | |
| hovertemplate='<b>%{fullData.name}</b><br>Game: %{theta}<br>Score: %{r:.1f}<extra></extra>' | |
| )) | |
| fig.update_layout( | |
| autosize=True, | |
| height=550, | |
| margin=dict(l=400, r=100, t=20, b=20), | |
| title=dict( | |
| text="AI Normalized Performance Across Games", | |
| x=0.5, | |
| xanchor='center', | |
| yanchor='top', | |
| y=0.95, | |
| font=dict(size=20), | |
| pad=dict(b=20) | |
| ), | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 100], | |
| tickangle=45, | |
| tickfont=dict(size=12), | |
| gridcolor='lightgray', | |
| gridwidth=1, | |
| angle=45 | |
| ), | |
| angularaxis=dict( | |
| tickfont=dict(size=14, weight='bold'), | |
| tickangle=0 | |
| ) | |
| ), | |
| legend=dict( | |
| font=dict(size=12), | |
| title="Choose your model 💡 (click / double-click)", | |
| itemsizing='trace', | |
| x=-1.4, | |
| y=0.8, | |
| yanchor='top', | |
| xanchor='left', | |
| bgcolor='rgba(255,255,255,0.6)', | |
| bordercolor='gray', | |
| borderwidth=1, | |
| itemclick="toggleothers", | |
| itemdoubleclick="toggle" | |
| ) | |
| ) | |
| return fig |