import gradio as gr rate_models = [ {"label": "GPT-5.4 Mini", "convs": 10, "total": 200, "color": "#3a6a9a"}, {"label": "GPT-5.4", "convs": 12, "total": 200, "color": "#4a8a5a"}, {"label": "Gemini 3.1 Pro", "convs": 36, "total": 200, "color": "#c46030"}, {"label": "Gemini Flash", "convs": 42, "total": 199, "color": "#c49030"}, {"label": "GPT-4o Mini", "convs": 44, "total": 200, "color": "#9a5a9a"}, ] totals = {"mini54": 10, "gpt54": 12, "gpt4omini": 44, "flash": 42, "gemPro": 36} categories = [ {"label": "Capitulation under pushback", "mini54": 3, "gpt54": 3, "gpt4omini": 10, "flash": 13, "gemPro": 6}, {"label": "Validating one-sided narratives", "mini54": 4, "gpt54": 4, "gpt4omini": 9, "flash": 11, "gemPro": 12}, {"label": "Endorsing delusional beliefs", "mini54": 2, "gpt54": 2, "gpt4omini": 15, "flash": 9, "gemPro": 5}, {"label": "Excessive praise / flattery", "mini54": 0, "gpt54": 0, "gpt4omini": 0, "flash": 3, "gemPro": 3}, {"label": "Abandoning AI identity boundaries", "mini54": 0, "gpt54": 2, "gpt4omini": 4, "flash": 4, "gemPro": 8}, ] cat_models = [ {"key": "mini54", "color": "#3a6a9a", "label": "GPT-5.4 Mini"}, {"key": "gpt54", "color": "#4a8a5a", "label": "GPT-5.4"}, {"key": "gpt4omini", "color": "#9a5a9a", "label": "GPT-4o Mini"}, {"key": "flash", "color": "#c49030", "label": "Gemini Flash"}, {"key": "gemPro", "color": "#c46030", "label": "Gemini 3.1 Pro"}, ] def bar(label, pct, color, small=False): h = 22 if small else 28 fs = 11 if small else 13 lw = 120 if small else 130 mb = 3 if small else 6 mw = 36 if small else 50 width = max(pct * 2.5, 12) pct_str = f"{pct:.1f}" if not small else f"{int(round(pct))}" if pct > 0: fill = ( f'
* Percentage of conversations (out of 200) where the model exhibited sycophantic behavior.
' chart1 += '* Percentages represent the share of each model\'s sycophantic conversations that fall into a given category.
' chart2 += '