// Auto-generated from figure3_master_heatmap.csv // Structure: array of { model, tier, scores: { "Game|Modality": value } } const GAMES = ["KingD", "Res Arcana", "Pax Ren.", "Carca.", "Catan"]; const GAME_LABELS = { "KingD": "Kingdomino", "Res Arcana": "Res Arcana", "Pax Ren.": "Pax Renaissance", "Carca.": "Carcassonne", "Catan": "Catan" }; const MODALITIES = ["None", "Text", "Image"]; const TIERS = ["T1", "T2", "T3"]; const TIER_LABELS = { "T1": "Tier 1: Environment Perception", "T2": "Tier 2: Rules Integration", "T3": "Tier 3: Short-Horizon Optimization" }; const BENCHMARK_DATA = [ { model: "GPT-4o", tier: "T1", scores: { "KingD|None": 0.381, "KingD|Text": 0.571, "KingD|Image": 0.429, "Res Arcana|None": 0.650, "Res Arcana|Text": 0.650, "Res Arcana|Image": 0.575, "Pax Ren.|None": 0.375, "Pax Ren.|Text": 0.475, "Pax Ren.|Image": 0.600, "Carca.|None": 0.425, "Carca.|Text": 0.350, "Carca.|Image": 0.400, "Catan|None": 0.450, "Catan|Text": 0.450, "Catan|Image": 0.450 } }, { model: "o1", tier: "T1", scores: { "KingD|None": 0.524, "KingD|Text": 0.429, "KingD|Image": 0.429, "Res Arcana|None": 0.675, "Res Arcana|Text": 0.525, "Res Arcana|Image": 0.600, "Pax Ren.|None": 0.450, "Pax Ren.|Text": 0.550, "Pax Ren.|Image": 0.475, "Carca.|None": 0.450, "Carca.|Text": 0.450, "Carca.|Image": 0.375, "Catan|None": 0.475, "Catan|Text": 0.550, "Catan|Image": 0.550 } }, { model: "GPT-4.1", tier: "T1", scores: { "KingD|None": 0.619, "KingD|Text": 0.524, "KingD|Image": 0.619, "Res Arcana|None": 0.775, "Res Arcana|Text": 0.725, "Res Arcana|Image": 0.750, "Pax Ren.|None": 0.525, "Pax Ren.|Text": 0.575, "Pax Ren.|Image": 0.600, "Carca.|None": 0.575, "Carca.|Text": 0.400, "Carca.|Image": 0.475, "Catan|None": 0.575, "Catan|Text": 0.450, "Catan|Image": 0.650 } }, { model: "o3", tier: "T1", scores: { "KingD|None": 0.750, "KingD|Text": 0.675, "KingD|Image": 0.650, "Res Arcana|None": 0.775, "Res Arcana|Text": 0.775, "Res Arcana|Image": 0.700, "Pax Ren.|None": 0.475, "Pax Ren.|Text": 0.675, "Pax Ren.|Image": 0.650, "Carca.|None": 0.450, "Carca.|Text": 0.500, "Carca.|Image": 0.575, "Catan|None": 0.600, "Catan|Text": 0.575, "Catan|Image": 0.550 } }, { model: "GPT-5.1", tier: "T1", scores: { "KingD|None": 0.750, "KingD|Text": 0.650, "KingD|Image": 0.675, "Res Arcana|None": 0.775, "Res Arcana|Text": 0.800, "Res Arcana|Image": 0.800, "Pax Ren.|None": 0.600, "Pax Ren.|Text": 0.600, "Pax Ren.|Image": 0.650, "Carca.|None": 0.725, "Carca.|Text": 0.525, "Carca.|Image": 0.550, "Catan|None": 0.600, "Catan|Text": 0.575, "Catan|Image": 0.575 } }, { model: "Gemini 2.5 Flash", tier: "T1", scores: { "KingD|None": 0.524, "KingD|Text": 0.476, "KingD|Image": 0.381, "Res Arcana|None": 0.675, "Res Arcana|Text": 0.775, "Res Arcana|Image": 0.500, "Pax Ren.|None": 0.550, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.375, "Carca.|None": 0.450, "Carca.|Text": 0.400, "Carca.|Image": 0.350, "Catan|None": 0.625, "Catan|Text": 0.500, "Catan|Image": 0.325 } }, { model: "Gemini 2.5 Pro", tier: "T1", scores: { "KingD|None": 0.524, "KingD|Text": 0.524, "KingD|Image": 0.333, "Res Arcana|None": 0.650, "Res Arcana|Text": 0.700, "Res Arcana|Image": 0.525, "Pax Ren.|None": 0.650, "Pax Ren.|Text": 0.725, "Pax Ren.|Image": 0.375, "Carca.|None": 0.425, "Carca.|Text": 0.350, "Carca.|Image": 0.450, "Catan|None": 0.575, "Catan|Text": 0.650, "Catan|Image": 0.425 } }, { model: "Gemini 3 Pro", tier: "T1", scores: { "KingD|None": 0.825, "KingD|Text": 0.800, "KingD|Image": 0.850, "Res Arcana|None": 0.875, "Res Arcana|Text": 0.850, "Res Arcana|Image": 0.775, "Pax Ren.|None": 0.775, "Pax Ren.|Text": 0.750, "Pax Ren.|Image": 0.800, "Carca.|None": 0.825, "Carca.|Text": 0.750, "Carca.|Image": 0.650, "Catan|None": 0.525, "Catan|Text": 0.625, "Catan|Image": 0.700 } }, { model: "Claude 4.5 Sonnet", tier: "T1", scores: { "KingD|None": 0.571, "KingD|Text": 0.619, "KingD|Image": 0.524, "Res Arcana|None": 0.650, "Res Arcana|Text": 0.800, "Res Arcana|Image": 0.800, "Pax Ren.|None": 0.600, "Pax Ren.|Text": 0.650, "Pax Ren.|Image": null, "Carca.|None": 0.375, "Carca.|Text": 0.325, "Carca.|Image": 0.300, "Catan|None": 0.600, "Catan|Text": 0.625, "Catan|Image": null } }, { model: "GPT-4o", tier: "T2", scores: { "KingD|None": 0.300, "KingD|Text": 0.433, "KingD|Image": 0.300, "Res Arcana|None": 0.225, "Res Arcana|Text": 0.400, "Res Arcana|Image": 0.350, "Pax Ren.|None": 0.125, "Pax Ren.|Text": 0.525, "Pax Ren.|Image": 0.475, "Carca.|None": 0.125, "Carca.|Text": 0.150, "Carca.|Image": 0.225, "Catan|None": 0.225, "Catan|Text": 0.200, "Catan|Image": 0.200 } }, { model: "o1", tier: "T2", scores: { "KingD|None": 0.200, "KingD|Text": 0.400, "KingD|Image": 0.267, "Res Arcana|None": 0.350, "Res Arcana|Text": 0.350, "Res Arcana|Image": 0.350, "Pax Ren.|None": 0.250, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.450, "Carca.|None": 0.225, "Carca.|Text": 0.250, "Carca.|Image": 0.300, "Catan|None": 0.150, "Catan|Text": 0.250, "Catan|Image": 0.225 } }, { model: "GPT-4.1", tier: "T2", scores: { "KingD|None": 0.333, "KingD|Text": 0.433, "KingD|Image": 0.300, "Res Arcana|None": 0.400, "Res Arcana|Text": 0.500, "Res Arcana|Image": 0.475, "Pax Ren.|None": 0.275, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.375, "Carca.|None": 0.175, "Carca.|Text": 0.200, "Carca.|Image": 0.150, "Catan|None": 0.325, "Catan|Text": 0.225, "Catan|Image": 0.325 } }, { model: "o3", tier: "T2", scores: { "KingD|None": 0.350, "KingD|Text": 0.400, "KingD|Image": 0.375, "Res Arcana|None": 0.325, "Res Arcana|Text": 0.625, "Res Arcana|Image": 0.475, "Pax Ren.|None": 0.300, "Pax Ren.|Text": 0.550, "Pax Ren.|Image": 0.575, "Carca.|None": 0.275, "Carca.|Text": 0.275, "Carca.|Image": 0.275, "Catan|None": 0.375, "Catan|Text": 0.275, "Catan|Image": 0.275 } }, { model: "GPT-5.1", tier: "T2", scores: { "KingD|None": 0.300, "KingD|Text": 0.325, "KingD|Image": 0.275, "Res Arcana|None": 0.325, "Res Arcana|Text": 0.525, "Res Arcana|Image": 0.525, "Pax Ren.|None": 0.200, "Pax Ren.|Text": 0.600, "Pax Ren.|Image": 0.467, "Carca.|None": 0.250, "Carca.|Text": 0.250, "Carca.|Image": 0.325, "Catan|None": 0.275, "Catan|Text": 0.275, "Catan|Image": 0.300 } }, { model: "Gemini 2.5 Flash", tier: "T2", scores: { "KingD|None": 0.167, "KingD|Text": 0.300, "KingD|Image": 0.267, "Res Arcana|None": 0.225, "Res Arcana|Text": 0.300, "Res Arcana|Image": 0.300, "Pax Ren.|None": 0.250, "Pax Ren.|Text": 0.300, "Pax Ren.|Image": 0.375, "Carca.|None": 0.250, "Carca.|Text": 0.300, "Carca.|Image": 0.200, "Catan|None": 0.250, "Catan|Text": 0.275, "Catan|Image": 0.125 } }, { model: "Gemini 2.5 Pro", tier: "T2", scores: { "KingD|None": 0.367, "KingD|Text": 0.367, "KingD|Image": 0.267, "Res Arcana|None": 0.375, "Res Arcana|Text": 0.475, "Res Arcana|Image": 0.375, "Pax Ren.|None": 0.100, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.250, "Carca.|None": 0.225, "Carca.|Text": 0.275, "Carca.|Image": 0.150, "Catan|None": 0.275, "Catan|Text": 0.350, "Catan|Image": 0.175 } }, { model: "Gemini 3 Pro", tier: "T2", scores: { "KingD|None": 0.725, "KingD|Text": 0.675, "KingD|Image": 0.750, "Res Arcana|None": 0.550, "Res Arcana|Text": 0.625, "Res Arcana|Image": 0.650, "Pax Ren.|None": 0.325, "Pax Ren.|Text": 0.475, "Pax Ren.|Image": 0.425, "Carca.|None": 0.425, "Carca.|Text": 0.425, "Carca.|Image": 0.325, "Catan|None": 0.500, "Catan|Text": 0.325, "Catan|Image": 0.500 } }, { model: "Claude 4.5 Sonnet", tier: "T2", scores: { "KingD|None": 0.300, "KingD|Text": 0.333, "KingD|Image": 0.233, "Res Arcana|None": 0.433, "Res Arcana|Text": 0.633, "Res Arcana|Image": 0.400, "Pax Ren.|None": 0.300, "Pax Ren.|Text": 0.533, "Pax Ren.|Image": null, "Carca.|None": 0.275, "Carca.|Text": 0.275, "Carca.|Image": 0.375, "Catan|None": 0.225, "Catan|Text": 0.275, "Catan|Image": null } }, { model: "GPT-4o", tier: "T3", scores: { "KingD|None": 0.000, "KingD|Text": 0.000, "KingD|Image": 0.000, "Res Arcana|None": 0.020, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.000, "Pax Ren.|None": 0.053, "Pax Ren.|Text": 0.053, "Pax Ren.|Image": 0.018, "Carca.|None": 0.040, "Carca.|Text": 0.000, "Carca.|Image": 0.040, "Catan|None": 0.065, "Catan|Text": 0.000, "Catan|Image": 0.032 } }, { model: "o1", tier: "T3", scores: { "KingD|None": 0.000, "KingD|Text": 0.020, "KingD|Image": 0.000, "Res Arcana|None": 0.040, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.040, "Pax Ren.|None": 0.105, "Pax Ren.|Text": 0.018, "Pax Ren.|Image": 0.053, "Carca.|None": 0.080, "Carca.|Text": 0.040, "Carca.|Image": 0.040, "Catan|None": 0.032, "Catan|Text": 0.065, "Catan|Image": 0.032 } }, { model: "GPT-4.1", tier: "T3", scores: { "KingD|None": 0.040, "KingD|Text": 0.080, "KingD|Image": 0.100, "Res Arcana|None": 0.020, "Res Arcana|Text": 0.020, "Res Arcana|Image": 0.020, "Pax Ren.|None": 0.123, "Pax Ren.|Text": 0.123, "Pax Ren.|Image": 0.088, "Carca.|None": 0.020, "Carca.|Text": 0.000, "Carca.|Image": 0.060, "Catan|None": 0.194, "Catan|Text": 0.065, "Catan|Image": 0.032 } }, { model: "o3", tier: "T3", scores: { "KingD|None": 0.060, "KingD|Text": 0.120, "KingD|Image": 0.040, "Res Arcana|None": 0.000, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.060, "Pax Ren.|None": 0.070, "Pax Ren.|Text": 0.175, "Pax Ren.|Image": 0.211, "Carca.|None": 0.120, "Carca.|Text": 0.060, "Carca.|Image": 0.100, "Catan|None": 0.194, "Catan|Text": 0.226, "Catan|Image": 0.258 } }, { model: "GPT-5.1", tier: "T3", scores: { "KingD|None": 0.160, "KingD|Text": 0.060, "KingD|Image": 0.060, "Res Arcana|None": 0.080, "Res Arcana|Text": 0.080, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.123, "Pax Ren.|Text": 0.088, "Pax Ren.|Image": 0.211, "Carca.|None": 0.020, "Carca.|Text": 0.100, "Carca.|Image": 0.100, "Catan|None": 0.129, "Catan|Text": 0.161, "Catan|Image": 0.161 } }, { model: "Gemini 2.5 Flash", tier: "T3", scores: { "KingD|None": 0.000, "KingD|Text": 0.020, "KingD|Image": 0.000, "Res Arcana|None": 0.000, "Res Arcana|Text": 0.080, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.018, "Pax Ren.|Text": 0.053, "Pax Ren.|Image": 0.000, "Carca.|None": 0.020, "Carca.|Text": 0.060, "Carca.|Image": 0.040, "Catan|None": 0.032, "Catan|Text": 0.000, "Catan|Image": 0.065 } }, { model: "Gemini 2.5 Pro", tier: "T3", scores: { "KingD|None": 0.040, "KingD|Text": 0.020, "KingD|Image": 0.000, "Res Arcana|None": 0.120, "Res Arcana|Text": 0.300, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.123, "Pax Ren.|Text": 0.053, "Pax Ren.|Image": 0.088, "Carca.|None": 0.060, "Carca.|Text": 0.060, "Carca.|Image": 0.040, "Catan|None": 0.129, "Catan|Text": 0.097, "Catan|Image": 0.161 } }, { model: "Gemini 3 Pro", tier: "T3", scores: { "KingD|None": 0.160, "KingD|Text": 0.160, "KingD|Image": 0.140, "Res Arcana|None": 0.200, "Res Arcana|Text": 0.260, "Res Arcana|Image": 0.180, "Pax Ren.|None": 0.053, "Pax Ren.|Text": 0.105, "Pax Ren.|Image": 0.193, "Carca.|None": 0.060, "Carca.|Text": 0.020, "Carca.|Image": 0.040, "Catan|None": 0.129, "Catan|Text": 0.161, "Catan|Image": 0.032 } }, { model: "Claude 4.5 Sonnet", tier: "T3", scores: { "KingD|None": 0.020, "KingD|Text": 0.040, "KingD|Image": 0.040, "Res Arcana|None": 0.040, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.053, "Pax Ren.|Text": 0.088, "Pax Ren.|Image": null, "Carca.|None": 0.040, "Carca.|Text": 0.020, "Carca.|Image": 0.060, "Catan|None": 0.000, "Catan|Text": 0.032, "Catan|Image": null } } ];