|
|
<div class="d3-text-metrics"></div> |
|
|
|
|
|
<style> |
|
|
.d3-text-metrics { |
|
|
font-family: var(--default-font-family); |
|
|
background: transparent; |
|
|
padding: 0; |
|
|
width: 100%; |
|
|
position: relative; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .example-text { |
|
|
font-size: 12px; |
|
|
line-height: 1.8; |
|
|
color: var(--text-color); |
|
|
font-family: monospace; |
|
|
margin: 8px 0; |
|
|
padding: 10px 12px; |
|
|
background: var(--surface-bg); |
|
|
border: 1px solid var(--border-color); |
|
|
border-radius: 6px; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .label { |
|
|
font-size: 10px; |
|
|
font-weight: 700; |
|
|
color: var(--muted-color); |
|
|
margin-right: 8px; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .metrics-grid { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(3, 1fr); |
|
|
gap: 12px; |
|
|
margin: 16px 0; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .metric-box { |
|
|
padding: 12px; |
|
|
background: var(--surface-bg); |
|
|
border: 1px solid var(--border-color); |
|
|
border-radius: 8px; |
|
|
transition: border-color 0.2s; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .metric-name { |
|
|
font-size: 13px; |
|
|
font-weight: 600; |
|
|
color: var(--text-color); |
|
|
margin-bottom: 6px; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .metric-score { |
|
|
font-size: 22px; |
|
|
font-weight: 700; |
|
|
color: var(--primary-color); |
|
|
margin-bottom: 4px; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .metric-detail { |
|
|
font-size: 11px; |
|
|
color: var(--muted-color); |
|
|
line-height: 1.4; |
|
|
} |
|
|
|
|
|
.d3-text-metrics .visualization { |
|
|
margin-top: 8px; |
|
|
padding: 8px; |
|
|
background: oklch(from var(--primary-color) calc(l + 0.45) c h / 0.06); |
|
|
border-radius: 4px; |
|
|
font-size: 10px; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .d3-text-metrics .visualization { |
|
|
background: oklch(from var(--primary-color) calc(l + 0.20) c h / 0.1); |
|
|
} |
|
|
|
|
|
.d3-text-metrics .token { |
|
|
display: inline-block; |
|
|
padding: 2px 5px; |
|
|
margin: 2px; |
|
|
border-radius: 3px; |
|
|
font-size: 10px; |
|
|
background: var(--surface-bg); |
|
|
border: 1px solid var(--border-color); |
|
|
} |
|
|
|
|
|
.d3-text-metrics .token.match { |
|
|
background: oklch(from var(--primary-color) calc(l + 0.35) c h / 0.35); |
|
|
border-color: var(--primary-color); |
|
|
font-weight: 600; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .d3-text-metrics .token.match { |
|
|
background: oklch(from var(--primary-color) calc(l + 0.25) c h / 0.4); |
|
|
} |
|
|
|
|
|
.d3-text-metrics .controls { |
|
|
display: flex; |
|
|
justify-content: center; |
|
|
margin-bottom: 16px; |
|
|
} |
|
|
|
|
|
.d3-text-metrics select { |
|
|
font-size: 12px; |
|
|
padding: 6px 24px 6px 10px; |
|
|
border: 1px solid var(--border-color); |
|
|
border-radius: 6px; |
|
|
background: var(--surface-bg); |
|
|
color: var(--text-color); |
|
|
cursor: pointer; |
|
|
appearance: none; |
|
|
background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e"); |
|
|
background-repeat: no-repeat; |
|
|
background-position: right 6px center; |
|
|
background-size: 12px; |
|
|
} |
|
|
|
|
|
@media (max-width: 768px) { |
|
|
.d3-text-metrics .metrics-grid { |
|
|
grid-template-columns: 1fr; |
|
|
} |
|
|
} |
|
|
</style> |
|
|
|
|
|
<script> |
|
|
(() => { |
|
|
const bootstrap = () => { |
|
|
const scriptEl = document.currentScript; |
|
|
let container = scriptEl ? scriptEl.previousElementSibling : null; |
|
|
if (!(container && container.classList && container.classList.contains('d3-text-metrics'))) { |
|
|
const candidates = Array.from(document.querySelectorAll('.d3-text-metrics')) |
|
|
.filter((el) => !(el.dataset && el.dataset.mounted === 'true')); |
|
|
container = candidates[candidates.length - 1] || null; |
|
|
} |
|
|
|
|
|
if (!container) return; |
|
|
if (container.dataset) { |
|
|
if (container.dataset.mounted === 'true') return; |
|
|
container.dataset.mounted = 'true'; |
|
|
} |
|
|
|
|
|
// Single example: Cat Evaluator |
|
|
const reference = "My cat loves doing model evaluation and testing benchmarks"; |
|
|
const prediction = "My cat enjoys model evaluation and testing models"; |
|
|
|
|
|
const tokenize = (text) => text.toLowerCase().trim().split(/\s+/); |
|
|
|
|
|
const getNgrams = (tokens, n) => { |
|
|
const ngrams = []; |
|
|
for (let i = 0; i <= tokens.length - n; i++) { |
|
|
ngrams.push(tokens.slice(i, i + n)); |
|
|
} |
|
|
return ngrams; |
|
|
}; |
|
|
|
|
|
const computeExactMatch = (pred, ref) => { |
|
|
return pred.toLowerCase().trim() === ref.toLowerCase().trim() ? 1.0 : 0.0; |
|
|
}; |
|
|
|
|
|
const computeBleu = (pred, ref) => { |
|
|
const predTokens = tokenize(pred); |
|
|
const refTokens = tokenize(ref); |
|
|
if (predTokens.length === 0) return { score: 0, details: [] }; |
|
|
|
|
|
const details = []; |
|
|
const precisions = []; |
|
|
|
|
|
for (let n = 1; n <= 3; n++) { |
|
|
const predNgrams = getNgrams(predTokens, n); |
|
|
const refNgrams = getNgrams(refTokens, n); |
|
|
if (predNgrams.length === 0) { |
|
|
precisions.push(0); |
|
|
continue; |
|
|
} |
|
|
|
|
|
const refCounts = {}; |
|
|
refNgrams.forEach(ng => { |
|
|
const key = ng.join(' '); |
|
|
refCounts[key] = (refCounts[key] || 0) + 1; |
|
|
}); |
|
|
|
|
|
let matches = 0; |
|
|
const matchedNgrams = []; |
|
|
const predCounts = {}; |
|
|
|
|
|
predNgrams.forEach(ng => { |
|
|
const key = ng.join(' '); |
|
|
predCounts[key] = (predCounts[key] || 0) + 1; |
|
|
if (refCounts[key] && predCounts[key] <= refCounts[key]) { |
|
|
matches++; |
|
|
if (!matchedNgrams.includes(key)) matchedNgrams.push(key); |
|
|
} |
|
|
}); |
|
|
|
|
|
const precision = matches / predNgrams.length; |
|
|
precisions.push(precision); |
|
|
details.push({ n, matches, total: predNgrams.length, matchedNgrams }); |
|
|
} |
|
|
|
|
|
const validPrecisions = precisions.filter(p => p > 0); |
|
|
const score = validPrecisions.length > 0 |
|
|
? Math.exp(validPrecisions.reduce((sum, p) => sum + Math.log(p), 0) / validPrecisions.length) |
|
|
: 0; |
|
|
|
|
|
return { score, details }; |
|
|
}; |
|
|
|
|
|
const computeRouge1 = (pred, ref) => { |
|
|
const predTokens = tokenize(pred); |
|
|
const refTokens = tokenize(ref); |
|
|
|
|
|
const predCounts = {}; |
|
|
const refCounts = {}; |
|
|
predTokens.forEach(t => predCounts[t] = (predCounts[t] || 0) + 1); |
|
|
refTokens.forEach(t => refCounts[t] = (refCounts[t] || 0) + 1); |
|
|
|
|
|
let overlap = 0; |
|
|
const matchedTokens = []; |
|
|
Object.keys(refCounts).forEach(token => { |
|
|
if (predCounts[token]) { |
|
|
overlap += Math.min(predCounts[token], refCounts[token]); |
|
|
matchedTokens.push(token); |
|
|
} |
|
|
}); |
|
|
|
|
|
const recall = refTokens.length > 0 ? overlap / refTokens.length : 0; |
|
|
const precision = predTokens.length > 0 ? overlap / predTokens.length : 0; |
|
|
const f1 = (precision + recall) > 0 ? 2 * precision * recall / (precision + recall) : 0; |
|
|
|
|
|
return { score: f1, recall, precision, matchedTokens }; |
|
|
}; |
|
|
|
|
|
const computeRouge2 = (pred, ref) => { |
|
|
const predTokens = tokenize(pred); |
|
|
const refTokens = tokenize(ref); |
|
|
|
|
|
const predBigrams = getNgrams(predTokens, 2); |
|
|
const refBigrams = getNgrams(refTokens, 2); |
|
|
|
|
|
if (refBigrams.length === 0) { |
|
|
return { score: 0, recall: 0, precision: 0, matchedBigrams: [] }; |
|
|
} |
|
|
|
|
|
const predCounts = {}; |
|
|
const refCounts = {}; |
|
|
predBigrams.forEach(bg => { |
|
|
const key = bg.join(' '); |
|
|
predCounts[key] = (predCounts[key] || 0) + 1; |
|
|
}); |
|
|
refBigrams.forEach(bg => { |
|
|
const key = bg.join(' '); |
|
|
refCounts[key] = (refCounts[key] || 0) + 1; |
|
|
}); |
|
|
|
|
|
let overlap = 0; |
|
|
const matchedBigrams = []; |
|
|
Object.keys(refCounts).forEach(bigram => { |
|
|
if (predCounts[bigram]) { |
|
|
overlap += Math.min(predCounts[bigram], refCounts[bigram]); |
|
|
matchedBigrams.push(bigram); |
|
|
} |
|
|
}); |
|
|
|
|
|
const recall = refBigrams.length > 0 ? overlap / refBigrams.length : 0; |
|
|
const precision = predBigrams.length > 0 ? overlap / predBigrams.length : 0; |
|
|
const f1 = (precision + recall) > 0 ? 2 * precision * recall / (precision + recall) : 0; |
|
|
|
|
|
return { score: f1, recall, precision, matchedBigrams }; |
|
|
}; |
|
|
|
|
|
const computeEditDistanceWithOps = (s1, s2) => { |
|
|
const m = s1.length; |
|
|
const n = s2.length; |
|
|
|
|
|
// Create DP table |
|
|
const dp = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0)); |
|
|
|
|
|
// Initialize |
|
|
for (let i = 0; i <= m; i++) dp[i][0] = i; |
|
|
for (let j = 0; j <= n; j++) dp[0][j] = j; |
|
|
|
|
|
// Fill DP table |
|
|
for (let i = 1; i <= m; i++) { |
|
|
for (let j = 1; j <= n; j++) { |
|
|
if (s1[i - 1] === s2[j - 1]) { |
|
|
dp[i][j] = dp[i - 1][j - 1]; |
|
|
} else { |
|
|
dp[i][j] = 1 + Math.min( |
|
|
dp[i - 1][j], // delete |
|
|
dp[i][j - 1], // insert |
|
|
dp[i - 1][j - 1] // substitute |
|
|
); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
// Traceback to find operations |
|
|
const operations = []; |
|
|
let i = m, j = n; |
|
|
|
|
|
while (i > 0 || j > 0) { |
|
|
if (i === 0) { |
|
|
operations.unshift({ type: 'insert', value: s2[j - 1], pos: j }); |
|
|
j--; |
|
|
} else if (j === 0) { |
|
|
operations.unshift({ type: 'delete', value: s1[i - 1], pos: i }); |
|
|
i--; |
|
|
} else if (s1[i - 1] === s2[j - 1]) { |
|
|
i--; |
|
|
j--; |
|
|
} else { |
|
|
const deleteCost = dp[i - 1][j]; |
|
|
const insertCost = dp[i][j - 1]; |
|
|
const substituteCost = dp[i - 1][j - 1]; |
|
|
|
|
|
if (substituteCost <= deleteCost && substituteCost <= insertCost) { |
|
|
operations.unshift({ type: 'substitute', from: s1[i - 1], to: s2[j - 1], pos: i }); |
|
|
i--; |
|
|
j--; |
|
|
} else if (deleteCost <= insertCost) { |
|
|
operations.unshift({ type: 'delete', value: s1[i - 1], pos: i }); |
|
|
i--; |
|
|
} else { |
|
|
operations.unshift({ type: 'insert', value: s2[j - 1], pos: j }); |
|
|
j--; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
return { distance: dp[m][n], operations }; |
|
|
}; |
|
|
|
|
|
const computeTer = (pred, ref) => { |
|
|
const predTokens = tokenize(pred); |
|
|
const refTokens = tokenize(ref); |
|
|
const result = computeEditDistanceWithOps(predTokens, refTokens); |
|
|
const score = refTokens.length > 0 ? result.distance / refTokens.length : 1.0; |
|
|
return { |
|
|
score, |
|
|
edits: result.distance, |
|
|
refLength: refTokens.length, |
|
|
operations: result.operations |
|
|
}; |
|
|
}; |
|
|
|
|
|
const computeBleurtMock = (pred, ref) => { |
|
|
const predTokens = new Set(tokenize(pred)); |
|
|
const refTokens = new Set(tokenize(ref)); |
|
|
const intersection = new Set([...predTokens].filter(t => refTokens.has(t))); |
|
|
const union = new Set([...predTokens, ...refTokens]); |
|
|
const jaccard = union.size > 0 ? intersection.size / union.size : 0; |
|
|
return { score: jaccard * 1.5 - 0.5, jaccard }; |
|
|
}; |
|
|
|
|
|
const render = () => { |
|
|
const exactMatch = computeExactMatch(prediction, reference); |
|
|
const bleu = computeBleu(prediction, reference); |
|
|
const rouge1 = computeRouge1(prediction, reference); |
|
|
const rouge2 = computeRouge2(prediction, reference); |
|
|
const ter = computeTer(prediction, reference); |
|
|
const bleurt = computeBleurtMock(prediction, reference); |
|
|
|
|
|
container.innerHTML = ` |
|
|
<div class="example-text"> |
|
|
<span class="label">REF:</span>${reference} |
|
|
</div> |
|
|
<div class="example-text"> |
|
|
<span class="label">PRED:</span>${prediction} |
|
|
</div> |
|
|
|
|
|
<div class="metrics-grid"> |
|
|
|
|
|
<div class="metric-box"> |
|
|
<div class="metric-name">Exact Match</div> |
|
|
<div class="metric-score">${exactMatch.toFixed(1)}</div> |
|
|
<div class="metric-detail">Binary: 1 or 0</div> |
|
|
<div class="visualization"> |
|
|
<div style="margin: 4px 0; font-size: 14px;"> |
|
|
${exactMatch === 1 ? '✓ Strings are identical' : '✗ Strings differ'} |
|
|
</div> |
|
|
<div style="margin-top: 6px; font-size: 9px; color: var(--muted-color);"> |
|
|
Most strict metric - no partial credit |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="metric-box"> |
|
|
<div class="metric-name">Translation Error Rate</div> |
|
|
<div class="metric-score">${ter.score.toFixed(3)}</div> |
|
|
<div class="metric-detail">Edit distance normalized</div> |
|
|
<div class="visualization"> |
|
|
<div style="margin: 4px 0;"> |
|
|
<strong>${ter.edits}</strong> edits / <strong>${ter.refLength}</strong> words = <strong>${ter.score.toFixed(3)}</strong> |
|
|
</div> |
|
|
${ter.operations.length > 0 ? ` |
|
|
<div style="margin-top: 8px; font-size: 10px;"> |
|
|
<div style="margin-bottom: 4px; color: var(--muted-color);">Edit operations:</div> |
|
|
${ter.operations.map((op, idx) => { |
|
|
if (op.type === 'substitute') { |
|
|
return `<div style="margin: 2px 0;">• Replace "<strong>${op.from}</strong>" → "<strong>${op.to}</strong>"</div>`; |
|
|
} else if (op.type === 'delete') { |
|
|
return `<div style="margin: 2px 0;">• Delete "<strong>${op.value}</strong>"</div>`; |
|
|
} else if (op.type === 'insert') { |
|
|
return `<div style="margin: 2px 0;">• Insert "<strong>${op.value}</strong>"</div>`; |
|
|
} |
|
|
}).join('')} |
|
|
</div> |
|
|
` : ''} |
|
|
<div style="margin-top: 6px; font-size: 9px; color: var(--muted-color);"> |
|
|
Lower is better (0 = identical) |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="metric-box"> |
|
|
<div class="metric-name">BLEURT</div> |
|
|
<div class="metric-score">${bleurt.score.toFixed(3)}</div> |
|
|
<div class="metric-detail">Semantic similarity</div> |
|
|
<div class="visualization"> |
|
|
<div style="margin-top: 6px; font-size: 9px; color: var(--muted-color); font-style: italic;"> |
|
|
BLEURT uses BERT embeddings learned from real text. |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
|
|
|
<div class="metric-box"> |
|
|
<div class="metric-name">BLEU</div> |
|
|
<div class="metric-score">${bleu.score.toFixed(3)}</div> |
|
|
<div class="metric-detail">N-gram precision-based</div> |
|
|
<div class="visualization"> |
|
|
${bleu.details.map(d => ` |
|
|
<div style="margin: 4px 0;"> |
|
|
<strong>${d.n}-gram:</strong> ${d.matches}/${d.total} (${(d.matches/d.total*100).toFixed(0)}%) |
|
|
</div> |
|
|
<div style="margin: 2px 0;"> |
|
|
${d.matchedNgrams.slice(0, 3).map(ng => `<span class="token match">${ng}</span>`).join('')} |
|
|
${d.matchedNgrams.length > 3 ? `<span style="color: var(--muted-color); font-size: 10px;">+${d.matchedNgrams.length - 3} more</span>` : ''} |
|
|
</div> |
|
|
`).join('')} |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="metric-box"> |
|
|
<div class="metric-name">ROUGE-1</div> |
|
|
<div class="metric-score">${rouge1.score.toFixed(3)}</div> |
|
|
<div class="metric-detail">Unigram-based F1</div> |
|
|
<div class="visualization"> |
|
|
<div style="margin: 4px 0;"> |
|
|
<strong>Recall:</strong> ${(rouge1.recall * 100).toFixed(0)}% | <strong>Precision:</strong> ${(rouge1.precision * 100).toFixed(0)}% |
|
|
</div> |
|
|
<div style="margin-top: 6px; font-size: 9px; color: var(--muted-color);"> |
|
|
Matched unigrams: |
|
|
</div> |
|
|
${rouge1.matchedTokens.length > 0 ? ` |
|
|
<div style="margin: 2px 0;"> |
|
|
${rouge1.matchedTokens.slice(0, 5).map(t => `<span class="token match">${t}</span>`).join('')} |
|
|
${rouge1.matchedTokens.length > 5 ? `<span style="color: var(--muted-color); font-size: 10px;">+${rouge1.matchedTokens.length - 5} more</span>` : ''} |
|
|
</div> |
|
|
` : ''} |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="metric-box"> |
|
|
<div class="metric-name">ROUGE-2</div> |
|
|
<div class="metric-score">${rouge2.score.toFixed(3)}</div> |
|
|
<div class="metric-detail">Bigram-based F1</div> |
|
|
<div class="visualization"> |
|
|
<div style="margin: 4px 0;"> |
|
|
<strong>Recall:</strong> ${(rouge2.recall * 100).toFixed(0)}% | <strong>Precision:</strong> ${(rouge2.precision * 100).toFixed(0)}% |
|
|
</div> |
|
|
<div style="margin-top: 6px; font-size: 9px; color: var(--muted-color);"> |
|
|
Matched bigrams: |
|
|
</div> |
|
|
${rouge2.matchedBigrams.length > 0 ? ` |
|
|
<div style="margin: 2px 0;"> |
|
|
${rouge2.matchedBigrams.slice(0, 3).map(bg => `<span class="token match">${bg}</span>`).join('')} |
|
|
${rouge2.matchedBigrams.length > 3 ? `<span style="color: var(--muted-color); font-size: 10px;">+${rouge2.matchedBigrams.length - 3} more</span>` : ''} |
|
|
</div> |
|
|
` : '<div style="margin: 2px 0; font-size: 10px; color: var(--muted-color);">No bigram matches</div>'} |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
`; |
|
|
}; |
|
|
|
|
|
render(); |
|
|
}; |
|
|
|
|
|
if (document.readyState === 'loading') { |
|
|
document.addEventListener('DOMContentLoaded', bootstrap, { once: true }); |
|
|
} else { |
|
|
bootstrap(); |
|
|
} |
|
|
})(); |
|
|
</script> |
|
|
|