dish-embed-benchmarks / index.html
adityapatni's picture
Add contact email to footer
43eea86 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dish-Embed Benchmark Results</title>
<style>
:root {
--bg: #ffffff;
--text: #1a1a2e;
--muted: #6b7280;
--green: #16a34a;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", sans-serif;
background: var(--bg);
color: var(--text);
max-width: 1100px;
margin: 0 auto;
padding: 36px 32px 28px;
}
/* Header */
.header { margin-bottom: 24px; }
.header h1 {
font-size: 1.5rem;
font-weight: 700;
letter-spacing: -0.02em;
margin-bottom: 5px;
}
.header .subtitle {
font-size: 0.85rem;
color: var(--muted);
line-height: 1.4;
}
/* Legend */
.legend {
display: flex;
gap: 14px;
flex-wrap: nowrap;
margin-bottom: 22px;
padding: 10px 14px;
background: #f9fafb;
border-radius: 6px;
border: 1px solid #e5e7eb;
}
.legend-item {
display: flex;
align-items: center;
gap: 5px;
font-size: 0.68rem;
color: #374151;
white-space: nowrap;
}
.legend-desc {
font-size: 0.58rem;
color: #9ca3af;
}
.legend-dot {
width: 10px;
height: 10px;
border-radius: 2px;
flex-shrink: 0;
}
.legend-sep {
width: 1px;
height: 18px;
background: #d1d5db;
align-self: center;
}
/* Section group headers */
.section-group {
grid-column: 1 / -1;
margin-top: 10px;
padding-top: 16px;
border-top: 1px solid #e5e7eb;
}
.section-group:first-child {
margin-top: 0;
padding-top: 0;
border-top: none;
}
.section-group-title {
font-size: 0.92rem;
font-weight: 700;
color: var(--text);
margin-bottom: 3px;
}
.section-group-note {
font-size: 0.72rem;
color: #9ca3af;
line-height: 1.4;
margin-bottom: 4px;
}
/* Two-column grid */
.charts-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px 28px;
}
/* Benchmark section */
.benchmark {}
.bench-header {
display: flex;
justify-content: space-between;
align-items: baseline;
margin-bottom: 8px;
}
.bench-title {
font-size: 0.85rem;
font-weight: 600;
}
.bench-metric {
font-size: 0.68rem;
color: var(--muted);
font-weight: 500;
}
/* Bar rows */
.bar-row {
display: flex;
align-items: center;
margin-bottom: 4px;
}
.bar-label {
width: 110px;
font-size: 0.72rem;
color: #6b7280;
flex-shrink: 0;
text-align: right;
padding-right: 10px;
}
.bar-track {
flex: 1;
height: 22px;
background: #f3f4f6;
border-radius: 3px;
position: relative;
overflow: hidden;
}
.bar-fill {
height: 100%;
border-radius: 3px;
position: relative;
}
.bar-value {
position: absolute;
right: 6px;
top: 50%;
transform: translateY(-50%);
font-size: 0.68rem;
font-weight: 700;
color: white;
text-shadow: 0 1px 2px rgba(0,0,0,0.2);
}
.bar-value-outside {
position: absolute;
left: calc(var(--bar-width) + 6px);
top: 50%;
transform: translateY(-50%);
font-size: 0.68rem;
font-weight: 600;
color: #374151;
}
.bar-row.best .bar-label {
font-weight: 700;
color: var(--text);
}
/* Glossary */
.glossary {
margin-top: 22px;
padding-top: 14px;
border-top: 1px solid #e5e7eb;
}
.glossary-title {
font-size: 0.78rem;
font-weight: 700;
color: var(--text);
margin-bottom: 8px;
}
.glossary-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 4px 28px;
}
.glossary-item {
font-size: 0.68rem;
color: #6b7280;
line-height: 1.5;
}
.glossary-item strong {
color: #374151;
margin-right: 4px;
}
/* Section annotation */
.section-annotation {
font-size: 0.68rem;
color: #64748b;
background: #f1f5f9;
border-radius: 4px;
padding: 6px 12px;
margin-bottom: 4px;
line-height: 1.4;
}
/* Footer */
.footer {
margin-top: 22px;
padding-top: 14px;
border-top: 1px solid #e5e7eb;
font-size: 0.68rem;
color: #9ca3af;
line-height: 1.5;
display: flex;
justify-content: space-between;
gap: 24px;
}
.footer-left { flex: 1; }
.footer-right { text-align: right; white-space: nowrap; }
/* Mobile */
@media (max-width: 700px) {
body { padding: 20px 14px 18px; }
.header h1 { font-size: 1.15rem; }
.legend { gap: 10px 14px; padding: 8px 12px; }
.legend-item { font-size: 0.7rem; }
.charts-grid {
grid-template-columns: 1fr;
gap: 18px;
}
.bar-label {
width: 72px;
font-size: 0.65rem;
padding-right: 6px;
}
.bar-track { height: 20px; }
.bar-value { font-size: 0.6rem; right: 4px; }
.bar-value-outside { font-size: 0.6rem; }
.bench-title { font-size: 0.78rem; }
.bench-metric { font-size: 0.62rem; }
.section-group-title { font-size: 0.82rem; }
.section-group-note { font-size: 0.65rem; }
.footer { flex-direction: column; gap: 8px; }
.footer-right { text-align: left; }
}
</style>
</head>
<body>
<div class="header">
<h1>Dish-Embed: Food Embedding Benchmark Results</h1>
<p class="subtitle">Domain-specialized food embedding model vs general-purpose alternatives. All models evaluated at 384 dimensions on identical benchmark data.</p>
</div>
<div class="legend">
<div class="legend-item"><div class="legend-dot" style="background:#16a34a"></div>Dish-Embed</div>
<div class="legend-item"><div class="legend-dot" style="background:#ea580c"></div>OpenAI TE3-Large</div>
<div class="legend-item"><div class="legend-dot" style="background:#6b7280"></div>BAAI BGE-M3</div>
<div class="legend-item"><div class="legend-dot" style="background:#2563eb"></div>Qwen3-Embedding-0.6B <span class="legend-desc">#1 MTEB Multilingual</span></div>
<div class="legend-item"><div class="legend-dot" style="background:#9333ea"></div>Microsoft E5-Large-v2</div>
<div class="legend-item"><div class="legend-dot" style="background:#64748b"></div>BGE-Reranker-v2-M3 <span class="legend-desc">Best public reranker</span></div>
</div>
<div id="charts" class="charts-grid"></div>
<div class="glossary">
<div class="glossary-title">Benchmark Glossary</div>
<div class="glossary-grid">
<div class="glossary-item"><strong>Indian Cuisine Matching</strong> Matching "Aloo Gobi" to "Potato Cauliflower Curry", "Dal Makhani" to "Black Lentil Curry" across restaurants.</div>
<div class="glossary-item"><strong>Cross-Language Matching</strong> Matching "ラーメン" to "Ramen", "خبز نان" to "Naan Bread" across languages and scripts.</div>
<div class="glossary-item"><strong>Bakery &amp; Dessert Matching</strong> Matching "Pain au Chocolat" to "Chocolate Croissant", "Crème Brûlée" to "Caramelized Custard".</div>
<div class="glossary-item"><strong>Beverage Matching</strong> Matching "Iced Americano" to "Cold Black Coffee", "Masala Chai" to "Spiced Tea Latte" across naming conventions.</div>
<div class="glossary-item"><strong>Synonym Recognition</strong> Retrieving "Pad Kra Pao" from a query for "Thai Basil Stir-Fry", or "Gyoza" from "Pot Stickers".</div>
<div class="glossary-item"><strong>Cuisine Classification</strong> Classifying "Tom Yum Goong" as Thai, "Cacio e Pepe" as Italian from the dish name alone. 19 cuisine categories.</div>
<div class="glossary-item"><strong>Category Search</strong> Searching "Thai soups" or "grilled appetizers" and ranking relevant menu items.</div>
<div class="glossary-item"><strong>Typo-Tolerant Search</strong> Returning "Margherita Pizza" when a customer types "margarita piza".</div>
<div class="glossary-item"><strong>Food Search</strong> General menu search ranking across diverse food queries and item catalogs.</div>
<div class="glossary-item"><strong>Global Search</strong> Search across multilingual menus spanning 15+ cuisines worldwide.</div>
<div class="glossary-item"><strong>Portion Size Sensitivity</strong> Ignoring portion labels like "Regular", "Family Pack", "Serves 2", "250ml" when matching the same dish. Generic models treat size text as meaningful content.</div>
<div class="glossary-item"><strong>Noisy Menu Matching</strong> Matching "***BEST SELLER*** Paneer Tikka - Chef's Special!!" to "Paneer Tikka" on another menu.</div>
<div class="glossary-item"><strong>Bilingual Menu Matching</strong> Matching "Falafel Wrap فلافل راب" to "Falafel Wrap" on menus that mix scripts.</div>
<div class="glossary-item"><strong>Embedding Stability</strong> Producing identical embeddings for "Fried Rice", "炒飯", and "フライドライス". 1.0 = perfectly consistent across scripts.</div>
</div>
</div>
<div class="footer">
<div class="footer-left">All competing models paired with BGE-Reranker-v2-M3, the strongest publicly available reranker.</div>
<div class="footer-right">April 2026<br>Dish-Embed &middot; embed.statode.com<br><a href="mailto:adityapatni.work@gmail.com" style="color:#9ca3af;text-decoration:none">adityapatni.work@gmail.com</a></div>
</div>
<script>
const FULL_MODELS = [
{ key: "dish_embed", label: "Dish-Embed", color: "#16a34a" },
{ key: "openai", label: "OpenAI TE3L", color: "#ea580c" },
{ key: "bge_m3", label: "BGE-M3", color: "#6b7280" },
{ key: "qwen3", label: "Qwen3-0.6B", color: "#2563eb" },
{ key: "e5_large", label: "E5-Large-v2", color: "#9333ea" },
];
const COLLAPSED_MODELS = [
{ key: "dish_embed", label: "Dish-Embed", color: "#16a34a" },
{ key: "off_shelf", label: "All others", color: "#64748b" },
];
const RERANKER_MODELS = [
{ key: "dish_embed", label: "Dish-Embed", color: "#16a34a", isOurs: true },
{ key: "openai", label: "OpenAI TE3L", color: "#64748b" },
{ key: "bge_m3", label: "BGE-M3", color: "#64748b" },
{ key: "qwen3", label: "Qwen3-0.6B", color: "#64748b" },
{ key: "e5_large", label: "E5-Large-v2", color: "#64748b" },
];
const SECTIONS = [
{
title: "Food Understanding",
note: "Core food knowledge that powers synonym-aware search, cuisine tagging, and regional variant detection.",
mode: "full",
benchmarks: [
{
title: "Synonym Recognition",
metric: "Recall@5",
scores: { dish_embed: 0.808, openai: 0.749, bge_m3: 0.707, qwen3: 0.514, e5_large: 0.661 }
},
{
title: "Cuisine Classification (19 cuisines)",
metric: "Macro Accuracy",
scores: { dish_embed: 0.889, openai: 0.822, bge_m3: 0.762, qwen3: 0.439, e5_large: 0.298 }
},
]
},
{
title: "Menu Search",
note: "Ranking relevant menu items when customers search for 'Thai soups' or type 'chiken tikka' with a typo.",
mode: "full",
benchmarks: [
{
title: "Category Search",
metric: "NDCG@10",
scores: { dish_embed: 0.828, openai: 0.797, bge_m3: 0.759, qwen3: 0.802, e5_large: 0.799 }
},
{
title: "Typo-Tolerant Search",
metric: "NDCG@10",
scores: { dish_embed: 0.920, openai: 0.884, bge_m3: 0.902, qwen3: 0.892, e5_large: 0.907 }
},
{
title: "Food Search",
metric: "NDCG@10",
scores: { dish_embed: 0.943, openai: 0.925, bge_m3: 0.929, qwen3: 0.935, e5_large: 0.939 }
},
{
title: "Global Search",
metric: "NDCG@10",
scores: { dish_embed: 0.891, openai: 0.839, bge_m3: 0.886, qwen3: 0.875, e5_large: 0.860 }
}
]
},
{
title: "Cross-Restaurant Item Matching",
note: "Matching 'Gyoza' to 'Pot Stickers', 'Crème Brûlée' to 'Burnt Cream Custard', 'Dal Makhani' to 'Black Lentil Curry' across thousands of restaurants. Powers price comparison, catalog consolidation, and menu analytics.",
annotation: "OpenAI TE3-Large, BAAI BGE-M3, Qwen3-Embedding-0.6B, and Microsoft E5-Large-v2 all paired with BGE-Reranker-v2-M3. The reranker determines matching quality, so all embedding models produce identical scores.",
mode: "collapsed",
benchmarks: [
{
title: "Indian Cuisine Matching",
metric: "F1",
scores: { dish_embed: 0.916, off_shelf: 0.754 }
},
{
title: "Cross-Language Matching",
metric: "F1",
scores: { dish_embed: 0.831, off_shelf: 0.258 }
},
{
title: "Bakery & Dessert Matching",
metric: "F1",
scores: { dish_embed: 0.797, off_shelf: 0.655 }
},
{
title: "Beverage Matching",
metric: "F1",
scores: { dish_embed: 0.747, off_shelf: 0.648 }
},
]
},
{
title: "Robustness",
note: "Consistent performance across portion sizes, formatting differences, and platform-specific conventions.",
mode: "mixed",
benchmarks: [
{
title: "Portion Size Sensitivity",
metric: "F1",
mode: "collapsed",
scores: { dish_embed: 0.877, off_shelf: 0.082 }
},
{
title: "Noisy Menu Matching",
metric: "F1",
mode: "collapsed",
scores: { dish_embed: 0.922, off_shelf: 0.914 }
},
{
title: "Bilingual Menu Matching",
metric: "F1",
mode: "collapsed",
scores: { dish_embed: 0.886, off_shelf: 0.879 }
},
{
title: "Embedding Stability",
metric: "Cosine Similarity",
mode: "full",
scores: { dish_embed: 1.000, openai: 0.000, bge_m3: 0.506, qwen3: 0.001, e5_large: 0.170 }
}
]
}
];
const container = document.getElementById("charts");
SECTIONS.forEach((section) => {
const groupEl = document.createElement("div");
groupEl.className = "section-group";
groupEl.innerHTML = `
<div class="section-group-title">${section.title}</div>
<div class="section-group-note">${section.note}</div>
${section.annotation ? `<div class="section-annotation">${section.annotation}</div>` : ''}
`;
container.appendChild(groupEl);
section.benchmarks.forEach(bench => {
const benchMode = bench.mode || section.mode || "full";
const models = benchMode === "reranker" ? RERANKER_MODELS : benchMode === "collapsed" ? COLLAPSED_MODELS : FULL_MODELS;
const filteredModels = bench.filterZero
? models.filter(m => bench.scores[m.key] != null && bench.scores[m.key] > 0)
: models;
const activeScores = filteredModels.map(m => bench.scores[m.key]).filter(s => s != null);
const best = Math.max(...activeScores);
const el = document.createElement("div");
el.className = "benchmark";
let html = `
<div class="bench-header">
<div class="bench-title">${bench.title}</div>
<div class="bench-metric">${bench.metric}</div>
</div>`;
const sorted = [...filteredModels].sort((a, b) => (bench.scores[b.key] || 0) - (bench.scores[a.key] || 0));
let addedRerankerLabel = false;
sorted.forEach(model => {
const score = bench.scores[model.key];
if (score == null) return;
const pct = (score * 100).toFixed(1);
const barWidth = Math.max(0, (score / 1.0) * 100);
const isBest = Math.abs(score - best) < 0.0001;
const bestClass = isBest ? " best" : "";
const valueInside = barWidth > 25;
if (benchMode === "reranker" && !model.isOurs && !addedRerankerLabel) {
html += `<div class="reranker-label">with BGE-Reranker-v2-M3</div>`;
addedRerankerLabel = true;
}
html += `
<div class="bar-row${bestClass}">
<div class="bar-label">${model.label}</div>
<div class="bar-track">
<div class="bar-fill" style="width:${barWidth}%;background:${model.color}${isBest ? '' : 'cc'}">
${valueInside ? `<span class="bar-value">${pct}</span>` : ''}
</div>
${!valueInside ? `<span class="bar-value-outside" style="--bar-width:${barWidth}%">${pct}</span>` : ''}
</div>
</div>`;
});
el.innerHTML = html;
container.appendChild(el);
});
});
</script>
</body>
</html>