Clémentine
Init
ffdff5d
<div class="d3-llm-biases"></div>
<style>
.d3-llm-biases {
font-family: var(--default-font-family);
background: transparent !important;
border: none !important;
border-radius: 0 !important;
padding: var(--spacing-4) 0;
width: 100%;
margin: 0 auto;
position: relative;
box-shadow: none !important;
}
.d3-llm-biases svg {
width: 100%;
height: auto;
display: block;
}
.d3-llm-biases .card-rect {
stroke-width: 2;
transition: all 0.3s ease;
}
.d3-llm-biases .bias-title {
fill: var(--text-color);
font-size: 12px;
font-weight: 700;
}
.d3-llm-biases .bias-description {
fill: var(--text-color);
font-size: 10px;
font-weight: 400;
line-height: 1.4;
}
.d3-llm-biases .header-text {
fill: var(--text-color);
font-size: 12px;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.d3-llm-biases .example-label {
fill: var(--muted-color);
font-size: 9px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
}
@media (max-width: 768px) {
.d3-llm-biases .bias-title {
font-size: 10px;
}
.d3-llm-biases .bias-description {
font-size: 9px;
}
}
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function') return cb();
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
const onReady = () => {
if (window.d3 && typeof window.d3.select === 'function') cb();
};
s.addEventListener('load', onReady, { once: true });
if (window.d3) onReady();
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-llm-biases'))) {
const candidates = Array.from(document.querySelectorAll('.d3-llm-biases'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Get colors from ColorPalettes or fallback
const getColors = () => {
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
return window.ColorPalettes.getColors('categorical', 8);
}
return ['#e74c3c', '#3498db', '#9b59b6', '#f39c12', '#1abc9c', '#e67e22', '#95a5a6', '#34495e'];
};
// LLM judge biases - first 4 for row 1, remaining 3 for row 2
const biases = [
{
id: 'internal-consistency',
title: 'No Internal Consistency',
description: 'Gives different judgements if prompted multiple times (at T>0)',
reference: null
}, {
id: 'inconsistent-score-range',
title: 'No Consistent Score Ranges',
description: 'Model ranking do not follow a consistent scale (e.g: for a task where scores should be 1, 2, 3, 4, ... 10, the model might score 1, 1, 1, 10, 10 ... 10)',
reference: 'x.com/aparnadhinak/status/1748368364395721128',
reference2: 'github.com/LeonEricsson/llmjudge'
},
{
id: 'self-preference',
title: 'Self-Preference',
description: 'Judge will favor outputs from similar models when scoring',
reference: 'arxiv.org/abs/2404.13076'
},
{
id: 'input-perturbation',
title: 'Blindness to Input Perturbation',
description: 'If input is perturbed, judges don\'t detect quality drops consistently',
reference: 'arxiv.org/abs/2406.13439'
},
{
id: 'position-bias',
title: 'Position Bias',
description: 'When comparing answers, judge favors specific answer positions (e.g: systematically prefers first or second choice)',
reference: 'arxiv.org/abs/2306.05685'
},
{
id: 'verbosity-bias',
title: 'Verbosity Bias',
description: 'Models prefer more verbose answers',
reference: 'arxiv.org/abs/2404.04475'
},
{
id: 'human-consistency',
title: 'No Consistency With Human Scoring',
description: 'LLM ratings diverge from human ratings',
reference: 'arxiv.org/abs/2308.15812'
},
{
id: 'format-bias',
title: 'Format Bias',
description: 'Judge can\'t judge well when their prompt differs from their training prompt format',
reference: 'arxiv.org/abs/2310.17631'
}
];
const svg = d3.select(container).append('svg');
const g = svg.append('g');
let width = 800;
let height = 300;
// Helper function to wrap text
function wrapText(text, width) {
text.each(function() {
const text = d3.select(this);
const words = text.text().split(/\s+/).reverse();
let word;
let line = [];
let lineNumber = 0;
const lineHeight = 1.3;
const y = text.attr('y');
const x = text.attr('x');
const dy = parseFloat(text.attr('dy') || 0);
let tspan = text.text(null).append('tspan')
.attr('x', x)
.attr('y', y)
.attr('dy', dy + 'em');
while ((word = words.pop())) {
line.push(word);
tspan.text(line.join(' '));
if (tspan.node().getComputedTextLength() > width) {
line.pop();
tspan.text(line.join(' '));
line = [word];
tspan = text.append('tspan')
.attr('x', x)
.attr('y', y)
.attr('dy', ++lineNumber * lineHeight + dy + 'em')
.text(word);
}
}
});
}
function render() {
width = container.clientWidth || 800;
height = Math.max(550, Math.round(width * 0.7));
svg.attr('width', width).attr('height', height);
const margin = { top: 40, right: 20, bottom: 20, left: 20 };
const innerWidth = width - margin.left - margin.right;
const innerHeight = height - margin.top - margin.bottom;
g.attr('transform', `translate(${margin.left},${margin.top})`);
// Clear previous content
g.selectAll('*').remove();
const colors = getColors();
// Header
g.append('text')
.attr('class', 'header-text')
.attr('x', innerWidth / 2)
.attr('y', -15)
.attr('text-anchor', 'middle')
.text('LLM JUDGE BIASES');
// Calculate card dimensions - 4 rows: 2 cards each
const cols = 2;
const rows = 4;
const cardSpacingX = Math.min(20, innerWidth * 0.03);
const cardSpacingY = Math.min(18, innerHeight * 0.04);
const cardWidth = (innerWidth - cardSpacingX * (cols - 1)) / cols;
const cardHeight = (innerHeight - cardSpacingY * (rows - 1)) / rows;
// Draw cards in 4 rows (2 + 2 + 2 + 2)
biases.forEach((bias, i) => {
const row = Math.floor(i / 2);
const col = i % 2;
const x = col * (cardWidth + cardSpacingX);
const y = row * (cardHeight + cardSpacingY);
const cardGroup = g.append('g')
.attr('transform', `translate(${x},${y})`);
// Card background with frame
cardGroup.append('rect')
.attr('class', 'card-rect')
.attr('width', cardWidth)
.attr('height', cardHeight)
.attr('rx', 12)
.attr('fill', colors[i])
.attr('fill-opacity', 0.12)
.attr('stroke', colors[i])
.attr('stroke-opacity', 0.6)
.attr('stroke-width', 2);
// Title
cardGroup.append('text')
.attr('class', 'bias-title')
.attr('x', cardWidth / 2)
.attr('y', 20)
.attr('text-anchor', 'middle')
.text(bias.title);
// Description with wrapping
const descText = cardGroup.append('text')
.attr('class', 'bias-description')
.attr('x', cardWidth / 2)
.attr('y', 36)
.attr('text-anchor', 'middle')
.attr('dy', 0)
.text(bias.description);
wrapText(descText, cardWidth - 20);
// Example box (only if there's an example)
if (bias.example) {
const exampleY = cardHeight - 55;
const exampleHeight = 24;
cardGroup.append('rect')
.attr('x', 8)
.attr('y', exampleY)
.attr('width', cardWidth - 16)
.attr('height', exampleHeight)
.attr('rx', 4)
.attr('fill', colors[i])
.attr('fill-opacity', 0.15)
.attr('stroke', colors[i])
.attr('stroke-width', 1)
.attr('stroke-opacity', 0.4);
// Example text
cardGroup.append('text')
.attr('class', 'bias-description')
.attr('x', cardWidth / 2)
.attr('y', exampleY + 13)
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('font-size', 9)
.text(bias.example);
}
// Reference link (if exists)
if (bias.reference) {
const refY = bias.example ? cardHeight - 8 : cardHeight - 12;
const refLink = cardGroup.append('a')
.attr('href', `https://${bias.reference}`)
.attr('target', '_blank')
.attr('rel', 'noopener noreferrer');
refLink.append('text')
.attr('class', 'example-label')
.attr('x', cardWidth - 10)
.attr('y', bias.reference2 ? refY - 10 : refY)
.attr('text-anchor', 'end')
.attr('font-size', 8)
.attr('fill', colors[i])
.attr('opacity', 0.7)
.style('cursor', 'pointer')
.style('text-decoration', 'underline')
.text(bias.reference)
.on('mouseenter', function() {
d3.select(this).attr('opacity', 1);
})
.on('mouseleave', function() {
d3.select(this).attr('opacity', 0.7);
});
}
// Second reference link (if exists)
if (bias.reference2) {
const refY = bias.example ? cardHeight - 8 : cardHeight - 12;
const refLink2 = cardGroup.append('a')
.attr('href', `https://${bias.reference2}`)
.attr('target', '_blank')
.attr('rel', 'noopener noreferrer');
refLink2.append('text')
.attr('class', 'example-label')
.attr('x', cardWidth - 10)
.attr('y', refY)
.attr('text-anchor', 'end')
.attr('font-size', 8)
.attr('fill', colors[i])
.attr('opacity', 0.7)
.style('cursor', 'pointer')
.style('text-decoration', 'underline')
.text(bias.reference2)
.on('mouseenter', function() {
d3.select(this).attr('opacity', 1);
})
.on('mouseleave', function() {
d3.select(this).attr('opacity', 0.7);
});
}
});
}
render();
// Responsive handling
if (window.ResizeObserver) {
const ro = new ResizeObserver(() => render());
ro.observe(container);
} else {
window.addEventListener('resize', render);
}
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else {
ensureD3(bootstrap);
}
})();
</script>