import gradio as gr rate_models = [ {"label": "GPT-5.4 Mini", "convs": 10, "total": 200, "color": "#3a6a9a"}, {"label": "GPT-5.4", "convs": 12, "total": 200, "color": "#4a8a5a"}, {"label": "Gemini 3.1 Pro", "convs": 36, "total": 200, "color": "#c46030"}, {"label": "Gemini Flash", "convs": 42, "total": 199, "color": "#c49030"}, {"label": "GPT-4o Mini", "convs": 44, "total": 200, "color": "#9a5a9a"}, ] totals = {"mini54": 10, "gpt54": 12, "gpt4omini": 44, "flash": 42, "gemPro": 36} categories = [ {"label": "Capitulation under pushback", "mini54": 3, "gpt54": 3, "gpt4omini": 10, "flash": 13, "gemPro": 6}, {"label": "Validating one-sided narratives", "mini54": 4, "gpt54": 4, "gpt4omini": 9, "flash": 11, "gemPro": 12}, {"label": "Endorsing delusional beliefs", "mini54": 2, "gpt54": 2, "gpt4omini": 15, "flash": 9, "gemPro": 5}, {"label": "Excessive praise / flattery", "mini54": 0, "gpt54": 0, "gpt4omini": 0, "flash": 3, "gemPro": 3}, {"label": "Abandoning AI identity boundaries", "mini54": 0, "gpt54": 2, "gpt4omini": 4, "flash": 4, "gemPro": 8}, ] cat_models = [ {"key": "mini54", "color": "#3a6a9a", "label": "GPT-5.4 Mini"}, {"key": "gpt54", "color": "#4a8a5a", "label": "GPT-5.4"}, {"key": "gpt4omini", "color": "#9a5a9a", "label": "GPT-4o Mini"}, {"key": "flash", "color": "#c49030", "label": "Gemini Flash"}, {"key": "gemPro", "color": "#c46030", "label": "Gemini 3.1 Pro"}, ] def bar(label, pct, color, small=False): h = 22 if small else 28 fs = 11 if small else 13 lw = 120 if small else 130 mb = 3 if small else 6 mw = 36 if small else 50 width = max(pct * 2.5, 12) pct_str = f"{pct:.1f}" if not small else f"{int(round(pct))}" if pct > 0: fill = ( f'
{pct_str}%
' ) else: fill = f'
0%
' return ( f'
' f'
{label}
' f'
' f'{fill}
' ) # Build Chart 1 chart1 = '
' chart1 += '

Sycophancy rate by model

' for m in rate_models: pct = m["syc"] / m["total"] * 100 chart1 += bar(m["label"], pct, m["color"]) chart1 += '

* Percentage of conversations (out of 200) where the model exhibited sycophantic behavior.

' chart1 += '
' # Build Chart 2 chart2 = '
' chart2 += '

Share of each model\'s sycophantic conversations

' chart2 += '
' for m in cat_models: chart2 += ( f'' f'' f'{m["label"]} ({totals[m["key"]]} syc)' ) chart2 += '
' for cat in categories: chart2 += f'
{cat["label"]}
' for m in cat_models: raw = cat[m["key"]] total = totals[m["key"]] pct = round(raw / total * 100) if total > 0 else 0 chart2 += bar(m["label"], pct, m["color"], small=True) chart2 += '
' chart2 += '

* Percentages represent the share of each model\'s sycophantic conversations that fall into a given category.

' chart2 += '
' html = ( f'
' f'{chart1}{chart2}
' ) with gr.Blocks(css="body{background:#0e0e0e !important;} .gradio-container{background:#0e0e0e !important;}") as demo: gr.HTML(html) demo.launch()