ITBench-Lite / analysis_src /model_styles.py
rohan-arora-ibm's picture
bump: bringing in evaluation code from @ptoroisaza
0b73078 unverified
#!/usr/bin/env python3
"""
Shared model styling configuration for paper analysis figures.
Provides consistent colors, markers, and display names across all agents
(EOG, React with Code, and future agents).
Usage:
from paper_analysis.common.model_styles import get_model_style, MODEL_DISPLAY_NAMES
style = get_model_style("GPT-5.1")
ax.scatter(x, y, c=style['color'], marker=style['marker'], ...)
"""
import seaborn as sns
# =============================================================================
# MODEL DISPLAY NAMES
# Maps various raw names to standardized display names
# =============================================================================
MODEL_DISPLAY_NAMES = {
# OpenAI / Azure
"Azure_gpt-5.1-2025-11-13": "GPT-5.1",
"Azure_gpt-5.1-chat-2025-11-13": "GPT-5.1",
"Azure_o4-mini": "o4-mini",
"Azure_gpt-4o": "GPT-4o",
"openai_gpt-oss-120b": "GPT-OSS-120B",
"openai_gpt-oss-20b": "GPT-OSS-20B",
# Google / GCP
"GCP_gemini-2.5-pro": "Gemini 2.5 Pro",
"gemini-2.5-pro": "Gemini 2.5 Pro",
"Gemini-2.5-Pro": "Gemini 2.5 Pro",
"gcp_gemini-3-pro-preview": "Gemini 3 Pro",
"gemini-3-pro-preview": "Gemini 3 Pro",
"Gemini-3-Pro": "Gemini 3 Pro",
"gemini-3-flash-preview": "Gemini 3 Flash",
"Gemini-3-Flash": "Gemini 3 Flash",
"google_gemini-3-flash-preview": "Gemini 3 Flash",
# Moonshot AI
"moonshotai_kimi-k2-thinking": "Kimi K2",
"kimi-k2-thinking": "Kimi K2",
"Kimi-K2": "Kimi K2",
# Anthropic / AWS
"aws_claude-opus-4-5": "Claude Opus 4.5",
"Claude-Opus-4.5": "Claude Opus 4.5",
# Mistral AI
"mistralai_mistral-large-2512": "Mistral Large",
"Mistral-Large": "Mistral Large",
# Alibaba / Qwen
"qwen_qwen3-vl-32b-instruct": "Qwen3-VL-32B",
# ServiceNow
"ServiceNow-AI_Apriel-1.6-15b-Thinker": "Apriel-1.6-15B",
# Minimax
"minimax_minimax-m2.1": "Minimax M2.1",
}
# =============================================================================
# MODEL STYLES
# Defines color and marker for each model (by display name)
# Colors are colorblind-friendly, markers provide redundant encoding
# =============================================================================
# Colorblind-friendly palette (based on IBM Design Library / Wong palette)
_COLORS = {
'blue': '#0072B2',
'orange': '#E69F00',
'green': '#009E73',
'pink': '#CC79A7',
'light_blue': '#56B4E9',
'yellow': '#F0E442',
'red': '#D55E00',
'gray': '#999999',
'purple': '#9467BD',
'brown': '#8C564B',
'teal': '#17BECF',
}
# Marker styles for redundant encoding
_MARKERS = {
'circle': 'o',
'square': 's',
'diamond': 'D',
'triangle_up': '^',
'triangle_down': 'v',
'pentagon': 'p',
'hexagon': 'h',
'star': '*',
'plus': 'P',
'x': 'X',
}
# Model style definitions (display_name -> {color, marker})
MODEL_STYLES = {
# Google models - shades of blue
"Gemini 3 Flash": {
'color': _COLORS['blue'],
'marker': _MARKERS['circle'],
},
"Gemini 3 Pro": {
'color': _COLORS['light_blue'],
'marker': _MARKERS['circle'],
},
"Gemini 2.5 Pro": {
'color': _COLORS['teal'],
'marker': _MARKERS['circle'],
},
# OpenAI models - shades of green/orange
"GPT-5.1": {
'color': _COLORS['green'],
'marker': _MARKERS['square'],
},
"GPT-4o": {
'color': _COLORS['green'],
'marker': _MARKERS['diamond'],
},
"o4-mini": {
'color': _COLORS['yellow'],
'marker': _MARKERS['square'],
},
"GPT-OSS-120B": {
'color': _COLORS['orange'],
'marker': _MARKERS['triangle_up'],
},
"GPT-OSS-20B": {
'color': _COLORS['brown'],
'marker': _MARKERS['triangle_down'],
},
# Anthropic models - pink
"Claude Opus 4.5": {
'color': _COLORS['pink'],
'marker': _MARKERS['diamond'],
},
# Moonshot AI - red
"Kimi K2": {
'color': _COLORS['red'],
'marker': _MARKERS['pentagon'],
},
# Mistral - purple
"Mistral Large": {
'color': _COLORS['purple'],
'marker': _MARKERS['hexagon'],
},
# Minimax - gray
"Minimax M2.1": {
'color': _COLORS['gray'],
'marker': _MARKERS['star'],
},
# Qwen - teal
"Qwen3-VL-32B": {
'color': _COLORS['teal'],
'marker': _MARKERS['plus'],
},
# ServiceNow - brown
"Apriel-1.6-15B": {
'color': _COLORS['brown'],
'marker': _MARKERS['x'],
},
}
# Default style for unknown models
_DEFAULT_STYLE = {
'color': _COLORS['gray'],
'marker': _MARKERS['circle'],
}
def get_display_name(raw_name: str) -> str:
"""Convert raw model name to display name."""
return MODEL_DISPLAY_NAMES.get(raw_name, raw_name)
def get_model_style(model_name: str) -> dict:
"""
Get the style (color, marker) for a model.
Args:
model_name: Either raw name or display name
Returns:
Dict with 'color' and 'marker' keys
"""
# Try display name first
if model_name in MODEL_STYLES:
return MODEL_STYLES[model_name]
# Try converting from raw name
display_name = get_display_name(model_name)
if display_name in MODEL_STYLES:
return MODEL_STYLES[display_name]
# Return default
return _DEFAULT_STYLE
def get_model_color(model_name: str) -> str:
"""Get just the color for a model."""
return get_model_style(model_name)['color']
def get_model_marker(model_name: str) -> str:
"""Get just the marker for a model."""
return get_model_style(model_name)['marker']
# For backward compatibility - create a color palette list
def get_color_palette(n_colors: int = 10):
"""Get a colorblind-friendly palette with n colors."""
palette_order = ['blue', 'orange', 'green', 'pink', 'light_blue',
'red', 'purple', 'brown', 'teal', 'yellow']
return [_COLORS[c] for c in palette_order[:n_colors]]
SINGLE_COLUMN_WIDTH = 3.25 # inches (ICML)
DOUBLE_COLUMN_WIDTH = 6.75 # inches (ICML)
MIN_FONT_SIZE = 10
PLOT_PARAMETERS = {
'font.size': MIN_FONT_SIZE,
'font.family': 'serif',
'axes.labelsize': MIN_FONT_SIZE + 1,
'axes.titlesize': MIN_FONT_SIZE + 2,
'xtick.labelsize': MIN_FONT_SIZE,
'ytick.labelsize': MIN_FONT_SIZE,
'legend.fontsize': MIN_FONT_SIZE,
'figure.titlesize': MIN_FONT_SIZE + 2,
'figure.dpi': 150,
'savefig.dpi': 300,
'savefig.bbox': 'tight',
'axes.spines.top': False,
'axes.spines.right': False,
'axes.linewidth': 0.8,
'lines.linewidth': 1.0,
'patch.linewidth': 0.5,
}