Ryan Da
align models and colors
3ce5164
import React, { useState } from 'react';
const rateModels = [
{ label: 'GPT-5.4 Mini', syc: 10, total: 200, bg: 'linear-gradient(to right, #0a5c3a, #10a37f)' },
{ label: 'GPT-5.4', syc: 12, total: 200, bg: 'linear-gradient(to right, #0d6b45, #15c896)' },
{ label: 'Gemini 3.1 Pro', syc: 36, total: 200, bg: 'linear-gradient(to right, #1a56b0, #4285f4)' },
{ label: 'Gemini Flash', syc: 42, total: 199, bg: 'linear-gradient(to right, #2563a8, #5b9ef5)' },
{ label: 'GPT-4o Mini', syc: 44, total: 200, bg: 'linear-gradient(to right, #074a2e, #0d8a5f)' },
];
const totals = { mini54: 10, gpt54: 12, gpt4omini: 44, flash: 42, gemPro: 36 };
const categories = [
{ label: 'Capitulation under pushback', mini54: 3, gpt54: 3, gpt4omini: 10, flash: 13, gemPro: 6 },
{ label: 'Validating one-sided narratives', mini54: 4, gpt54: 4, gpt4omini: 9, flash: 11, gemPro: 12 },
{ label: 'Endorsing delusional beliefs', mini54: 2, gpt54: 2, gpt4omini: 15, flash: 9, gemPro: 5 },
{ label: 'Excessive praise / flattery', mini54: 0, gpt54: 0, gpt4omini: 0, flash: 3, gemPro: 3 },
{ label: 'Abandoning AI identity boundaries', mini54: 0, gpt54: 2, gpt4omini: 4, flash: 4, gemPro: 8 },
];
const catModels = [
{ key: 'mini54', bg: 'linear-gradient(to right, #0a5c3a, #10a37f)', dot: '#10a37f', label: 'GPT-5.4 Mini' },
{ key: 'gpt54', bg: 'linear-gradient(to right, #0d6b45, #15c896)', dot: '#15c896', label: 'GPT-5.4' },
{ key: 'gpt4omini', bg: 'linear-gradient(to right, #074a2e, #0d8a5f)', dot: '#0d8a5f', label: 'GPT-4o Mini' },
{ key: 'flash', bg: 'linear-gradient(to right, #2563a8, #5b9ef5)', dot: '#5b9ef5', label: 'Gemini Flash' },
{ key: 'gemPro', bg: 'linear-gradient(to right, #1a56b0, #4285f4)', dot: '#4285f4', label: 'Gemini 3.1 Pro' },
];
const BenchmarkChart = () => {
const [showCategories, setShowCategories] = useState(false);
return (
<div className="min-h-screen bg-gradient-to-br from-slate-900 to-slate-800 p-4">
<div className="max-w-4xl mx-auto">
{/* Header */}
<div className="text-center mb-6">
<h1 className="text-3xl font-bold text-white mb-3">
Sycophancy Benchmark
</h1>
<p className="text-slate-300 mb-4">
Percentage of conversations where each model exhibited sycophantic behavior
</p>
<button
onClick={() => setShowCategories(!showCategories)}
className="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-semibold transition-colors"
>
{showCategories ? 'Show Overall Rates' : 'Show Category Breakdown'}
</button>
</div>
{/* Chart 1: Overall Rate */}
{!showCategories && (
<div className="bg-white rounded-xl shadow-2xl p-6">
<h2 className="text-lg font-semibold text-gray-800 mb-4">
Sycophancy rate by model
</h2>
<div className="space-y-3">
{rateModels.map((m) => {
const pct = ((m.syc / m.total) * 100).toFixed(1);
const barWidth = Math.max(parseFloat(pct) * 2.5, 8);
return (
<div key={m.label}>
<div className="flex items-center justify-between mb-1">
<span className="font-semibold text-gray-800 text-sm">{m.label}</span>
<span className="text-lg font-bold text-gray-700">{pct}%</span>
</div>
<div className="h-7 bg-gray-200 rounded-full overflow-hidden">
<div
className="h-full rounded-full transition-all duration-700"
style={{ width: `${barWidth}%`, background: m.bg }}
/>
</div>
</div>
);
})}
</div>
<p className="mt-4 text-xs text-gray-500">
* Percentage of conversations (out of 200) where the model exhibited sycophantic behavior.
</p>
</div>
)}
{/* Chart 2: Category Breakdown */}
{showCategories && (
<div className="bg-white rounded-xl shadow-2xl p-6">
<h2 className="text-lg font-semibold text-gray-800 mb-4">
Share of each model's sycophantic conversations
</h2>
{/* Legend */}
<div className="flex flex-wrap gap-4 mb-6 text-xs bg-gray-50 p-3 rounded-lg">
{catModels.map((m) => (
<div key={m.key} className="flex items-center gap-1.5">
<div className="w-3 h-3 rounded" style={{ background: m.dot }} />
<span className="text-gray-700">{m.label} ({totals[m.key]} convs)</span>
</div>
))}
</div>
<div className="space-y-5">
{categories.map((cat) => (
<div key={cat.label}>
<h3 className="font-semibold text-gray-800 text-sm mb-2">{cat.label}</h3>
<div className="space-y-1.5">
{catModels.map((model) => {
const raw = cat[model.key];
const total = totals[model.key];
const pct = total > 0 ? Math.round((raw / total) * 100) : 0;
const barWidth = Math.max(pct, 5);
return (
<div key={model.key} className="flex items-center gap-2">
<span className="w-28 text-xs text-right text-gray-600 flex-shrink-0">
{model.label}
</span>
<div className="flex-1 h-5 bg-gray-200 rounded-full overflow-hidden">
{pct > 0 ? (
<div
className="h-full rounded-full flex items-center pl-2 text-xs font-semibold text-white transition-all duration-700"
style={{ width: `${barWidth}%`, background: model.bg }}
>
{pct}%
</div>
) : (
<div className="h-full flex items-center pl-2 text-xs text-gray-400">
0%
</div>
)}
</div>
</div>
);
})}
</div>
</div>
))}
</div>
<p className="mt-4 text-xs text-gray-500">
* Percentages represent the share of each model's sycophantic conversations that fall into a given category.
</p>
</div>
)}
{/* Footer */}
<div className="mt-6 text-center text-slate-400 text-xs">
<p>JulyAI Sycophancy Benchmark: {rateModels.length} SOTA models tested across 200 conversations each</p>
</div>
</div>
</div>
);
};
export default BenchmarkChart;