Spaces:

adityapatni
/

dish-embed-benchmarks

Running

App Files Files Community

dish-embed-benchmarks / index.html

adityapatni

Add contact email to footer

43eea86 verified 5 days ago

raw

history blame contribute delete

16.2 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Dish-Embed Benchmark Results</title>
	<style>
	:root {
	--bg: #ffffff;
	--text: #1a1a2e;
	--muted: #6b7280;
	--green: #16a34a;
	}
	* { box-sizing: border-box; margin: 0; padding: 0; }
	body {
	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", sans-serif;
	background: var(--bg);
	color: var(--text);
	max-width: 1100px;
	margin: 0 auto;
	padding: 36px 32px 28px;
	}

	/* Header */
	.header { margin-bottom: 24px; }
	.header h1 {
	font-size: 1.5rem;
	font-weight: 700;
	letter-spacing: -0.02em;
	margin-bottom: 5px;
	}
	.header .subtitle {
	font-size: 0.85rem;
	color: var(--muted);
	line-height: 1.4;
	}

	/* Legend */
	.legend {
	display: flex;
	gap: 14px;
	flex-wrap: nowrap;
	margin-bottom: 22px;
	padding: 10px 14px;
	background: #f9fafb;
	border-radius: 6px;
	border: 1px solid #e5e7eb;
	}
	.legend-item {
	display: flex;
	align-items: center;
	gap: 5px;
	font-size: 0.68rem;
	color: #374151;
	white-space: nowrap;
	}
	.legend-desc {
	font-size: 0.58rem;
	color: #9ca3af;
	}
	.legend-dot {
	width: 10px;
	height: 10px;
	border-radius: 2px;
	flex-shrink: 0;
	}
	.legend-sep {
	width: 1px;
	height: 18px;
	background: #d1d5db;
	align-self: center;
	}

	/* Section group headers */
	.section-group {
	grid-column: 1 / -1;
	margin-top: 10px;
	padding-top: 16px;
	border-top: 1px solid #e5e7eb;
	}
	.section-group:first-child {
	margin-top: 0;
	padding-top: 0;
	border-top: none;
	}
	.section-group-title {
	font-size: 0.92rem;
	font-weight: 700;
	color: var(--text);
	margin-bottom: 3px;
	}
	.section-group-note {
	font-size: 0.72rem;
	color: #9ca3af;
	line-height: 1.4;
	margin-bottom: 4px;
	}

	/* Two-column grid */
	.charts-grid {
	display: grid;
	grid-template-columns: 1fr 1fr;
	gap: 20px 28px;
	}

	/* Benchmark section */
	.benchmark {}
	.bench-header {
	display: flex;
	justify-content: space-between;
	align-items: baseline;
	margin-bottom: 8px;
	}
	.bench-title {
	font-size: 0.85rem;
	font-weight: 600;
	}
	.bench-metric {
	font-size: 0.68rem;
	color: var(--muted);
	font-weight: 500;
	}

	/* Bar rows */
	.bar-row {
	display: flex;
	align-items: center;
	margin-bottom: 4px;
	}
	.bar-label {
	width: 110px;
	font-size: 0.72rem;
	color: #6b7280;
	flex-shrink: 0;
	text-align: right;
	padding-right: 10px;
	}
	.bar-track {
	flex: 1;
	height: 22px;
	background: #f3f4f6;
	border-radius: 3px;
	position: relative;
	overflow: hidden;
	}
	.bar-fill {
	height: 100%;
	border-radius: 3px;
	position: relative;
	}
	.bar-value {
	position: absolute;
	right: 6px;
	top: 50%;
	transform: translateY(-50%);
	font-size: 0.68rem;
	font-weight: 700;
	color: white;
	text-shadow: 0 1px 2px rgba(0,0,0,0.2);
	}
	.bar-value-outside {
	position: absolute;
	left: calc(var(--bar-width) + 6px);
	top: 50%;
	transform: translateY(-50%);
	font-size: 0.68rem;
	font-weight: 600;
	color: #374151;
	}
	.bar-row.best .bar-label {
	font-weight: 700;
	color: var(--text);
	}

	/* Glossary */
	.glossary {
	margin-top: 22px;
	padding-top: 14px;
	border-top: 1px solid #e5e7eb;
	}
	.glossary-title {
	font-size: 0.78rem;
	font-weight: 700;
	color: var(--text);
	margin-bottom: 8px;
	}
	.glossary-grid {
	display: grid;
	grid-template-columns: 1fr 1fr;
	gap: 4px 28px;
	}
	.glossary-item {
	font-size: 0.68rem;
	color: #6b7280;
	line-height: 1.5;
	}
	.glossary-item strong {
	color: #374151;
	margin-right: 4px;
	}

	/* Section annotation */
	.section-annotation {
	font-size: 0.68rem;
	color: #64748b;
	background: #f1f5f9;
	border-radius: 4px;
	padding: 6px 12px;
	margin-bottom: 4px;
	line-height: 1.4;
	}

	/* Footer */
	.footer {
	margin-top: 22px;
	padding-top: 14px;
	border-top: 1px solid #e5e7eb;
	font-size: 0.68rem;
	color: #9ca3af;
	line-height: 1.5;
	display: flex;
	justify-content: space-between;
	gap: 24px;
	}
	.footer-left { flex: 1; }
	.footer-right { text-align: right; white-space: nowrap; }

	/* Mobile */
	@media (max-width: 700px) {
	body { padding: 20px 14px 18px; }
	.header h1 { font-size: 1.15rem; }
	.legend { gap: 10px 14px; padding: 8px 12px; }
	.legend-item { font-size: 0.7rem; }
	.charts-grid {
	grid-template-columns: 1fr;
	gap: 18px;
	}
	.bar-label {
	width: 72px;
	font-size: 0.65rem;
	padding-right: 6px;
	}
	.bar-track { height: 20px; }
	.bar-value { font-size: 0.6rem; right: 4px; }
	.bar-value-outside { font-size: 0.6rem; }
	.bench-title { font-size: 0.78rem; }
	.bench-metric { font-size: 0.62rem; }
	.section-group-title { font-size: 0.82rem; }
	.section-group-note { font-size: 0.65rem; }
	.footer { flex-direction: column; gap: 8px; }
	.footer-right { text-align: left; }
	}
	</style>
	</head>
	<body>

	<div class="header">
	<h1>Dish-Embed: Food Embedding Benchmark Results</h1>
	<p class="subtitle">Domain-specialized food embedding model vs general-purpose alternatives. All models evaluated at 384 dimensions on identical benchmark data.</p>
	</div>

	<div class="legend">
	<div class="legend-item"><div class="legend-dot" style="background:#16a34a"></div>Dish-Embed</div>
	<div class="legend-item"><div class="legend-dot" style="background:#ea580c"></div>OpenAI TE3-Large</div>
	<div class="legend-item"><div class="legend-dot" style="background:#6b7280"></div>BAAI BGE-M3</div>
	<div class="legend-item"><div class="legend-dot" style="background:#2563eb"></div>Qwen3-Embedding-0.6B <span class="legend-desc">#1 MTEB Multilingual</span></div>
	<div class="legend-item"><div class="legend-dot" style="background:#9333ea"></div>Microsoft E5-Large-v2</div>
	<div class="legend-item"><div class="legend-dot" style="background:#64748b"></div>BGE-Reranker-v2-M3 <span class="legend-desc">Best public reranker</span></div>
	</div>

	<div id="charts" class="charts-grid"></div>

	<div class="glossary">
	<div class="glossary-title">Benchmark Glossary</div>
	<div class="glossary-grid">
	<div class="glossary-item"><strong>Indian Cuisine Matching</strong> Matching "Aloo Gobi" to "Potato Cauliflower Curry", "Dal Makhani" to "Black Lentil Curry" across restaurants.</div>
	<div class="glossary-item"><strong>Cross-Language Matching</strong> Matching "ラーメン" to "Ramen", "خبز نان" to "Naan Bread" across languages and scripts.</div>
	<div class="glossary-item"><strong>Bakery & Dessert Matching</strong> Matching "Pain au Chocolat" to "Chocolate Croissant", "Crème Brûlée" to "Caramelized Custard".</div>
	<div class="glossary-item"><strong>Beverage Matching</strong> Matching "Iced Americano" to "Cold Black Coffee", "Masala Chai" to "Spiced Tea Latte" across naming conventions.</div>
	<div class="glossary-item"><strong>Synonym Recognition</strong> Retrieving "Pad Kra Pao" from a query for "Thai Basil Stir-Fry", or "Gyoza" from "Pot Stickers".</div>
	<div class="glossary-item"><strong>Cuisine Classification</strong> Classifying "Tom Yum Goong" as Thai, "Cacio e Pepe" as Italian from the dish name alone. 19 cuisine categories.</div>
	<div class="glossary-item"><strong>Category Search</strong> Searching "Thai soups" or "grilled appetizers" and ranking relevant menu items.</div>
	<div class="glossary-item"><strong>Typo-Tolerant Search</strong> Returning "Margherita Pizza" when a customer types "margarita piza".</div>
	<div class="glossary-item"><strong>Food Search</strong> General menu search ranking across diverse food queries and item catalogs.</div>
	<div class="glossary-item"><strong>Global Search</strong> Search across multilingual menus spanning 15+ cuisines worldwide.</div>
	<div class="glossary-item"><strong>Portion Size Sensitivity</strong> Ignoring portion labels like "Regular", "Family Pack", "Serves 2", "250ml" when matching the same dish. Generic models treat size text as meaningful content.</div>
	<div class="glossary-item"><strong>Noisy Menu Matching</strong> Matching "*BEST SELLER* Paneer Tikka - Chef's Special!!" to "Paneer Tikka" on another menu.</div>
	<div class="glossary-item"><strong>Bilingual Menu Matching</strong> Matching "Falafel Wrap فلافل راب" to "Falafel Wrap" on menus that mix scripts.</div>
	<div class="glossary-item"><strong>Embedding Stability</strong> Producing identical embeddings for "Fried Rice", "炒飯", and "フライドライス". 1.0 = perfectly consistent across scripts.</div>
	</div>
	</div>

	<div class="footer">
	<div class="footer-left">All competing models paired with BGE-Reranker-v2-M3, the strongest publicly available reranker.</div>
	<div class="footer-right">April 2026<br>Dish-Embed · embed.statode.com<br><a href="mailto:adityapatni.work@gmail.com" style="color:#9ca3af;text-decoration:none">adityapatni.work@gmail.com</a></div>
	</div>

	<script>
	const FULL_MODELS = [
	{ key: "dish_embed", label: "Dish-Embed", color: "#16a34a" },
	{ key: "openai", label: "OpenAI TE3L", color: "#ea580c" },
	{ key: "bge_m3", label: "BGE-M3", color: "#6b7280" },
	{ key: "qwen3", label: "Qwen3-0.6B", color: "#2563eb" },
	{ key: "e5_large", label: "E5-Large-v2", color: "#9333ea" },
	];

	const COLLAPSED_MODELS = [
	{ key: "dish_embed", label: "Dish-Embed", color: "#16a34a" },
	{ key: "off_shelf", label: "All others", color: "#64748b" },
	];

	const RERANKER_MODELS = [
	{ key: "dish_embed", label: "Dish-Embed", color: "#16a34a", isOurs: true },
	{ key: "openai", label: "OpenAI TE3L", color: "#64748b" },
	{ key: "bge_m3", label: "BGE-M3", color: "#64748b" },
	{ key: "qwen3", label: "Qwen3-0.6B", color: "#64748b" },
	{ key: "e5_large", label: "E5-Large-v2", color: "#64748b" },
	];

	const SECTIONS = [
	{
	title: "Food Understanding",
	note: "Core food knowledge that powers synonym-aware search, cuisine tagging, and regional variant detection.",
	mode: "full",
	benchmarks: [
	{
	title: "Synonym Recognition",
	metric: "Recall@5",
	scores: { dish_embed: 0.808, openai: 0.749, bge_m3: 0.707, qwen3: 0.514, e5_large: 0.661 }
	},
	{
	title: "Cuisine Classification (19 cuisines)",
	metric: "Macro Accuracy",
	scores: { dish_embed: 0.889, openai: 0.822, bge_m3: 0.762, qwen3: 0.439, e5_large: 0.298 }
	},
	]
	},
	{
	title: "Menu Search",
	note: "Ranking relevant menu items when customers search for 'Thai soups' or type 'chiken tikka' with a typo.",
	mode: "full",
	benchmarks: [
	{
	title: "Category Search",
	metric: "NDCG@10",
	scores: { dish_embed: 0.828, openai: 0.797, bge_m3: 0.759, qwen3: 0.802, e5_large: 0.799 }
	},
	{
	title: "Typo-Tolerant Search",
	metric: "NDCG@10",
	scores: { dish_embed: 0.920, openai: 0.884, bge_m3: 0.902, qwen3: 0.892, e5_large: 0.907 }
	},
	{
	title: "Food Search",
	metric: "NDCG@10",
	scores: { dish_embed: 0.943, openai: 0.925, bge_m3: 0.929, qwen3: 0.935, e5_large: 0.939 }
	},
	{
	title: "Global Search",
	metric: "NDCG@10",
	scores: { dish_embed: 0.891, openai: 0.839, bge_m3: 0.886, qwen3: 0.875, e5_large: 0.860 }
	}
	]
	},
	{
	title: "Cross-Restaurant Item Matching",
	note: "Matching 'Gyoza' to 'Pot Stickers', 'Crème Brûlée' to 'Burnt Cream Custard', 'Dal Makhani' to 'Black Lentil Curry' across thousands of restaurants. Powers price comparison, catalog consolidation, and menu analytics.",
	annotation: "OpenAI TE3-Large, BAAI BGE-M3, Qwen3-Embedding-0.6B, and Microsoft E5-Large-v2 all paired with BGE-Reranker-v2-M3. The reranker determines matching quality, so all embedding models produce identical scores.",
	mode: "collapsed",
	benchmarks: [
	{
	title: "Indian Cuisine Matching",
	metric: "F1",
	scores: { dish_embed: 0.916, off_shelf: 0.754 }
	},
	{
	title: "Cross-Language Matching",
	metric: "F1",
	scores: { dish_embed: 0.831, off_shelf: 0.258 }
	},
	{
	title: "Bakery & Dessert Matching",
	metric: "F1",
	scores: { dish_embed: 0.797, off_shelf: 0.655 }
	},
	{
	title: "Beverage Matching",
	metric: "F1",
	scores: { dish_embed: 0.747, off_shelf: 0.648 }
	},
	]
	},
	{
	title: "Robustness",
	note: "Consistent performance across portion sizes, formatting differences, and platform-specific conventions.",
	mode: "mixed",
	benchmarks: [
	{
	title: "Portion Size Sensitivity",
	metric: "F1",
	mode: "collapsed",
	scores: { dish_embed: 0.877, off_shelf: 0.082 }
	},
	{
	title: "Noisy Menu Matching",
	metric: "F1",
	mode: "collapsed",
	scores: { dish_embed: 0.922, off_shelf: 0.914 }
	},
	{
	title: "Bilingual Menu Matching",
	metric: "F1",
	mode: "collapsed",
	scores: { dish_embed: 0.886, off_shelf: 0.879 }
	},
	{
	title: "Embedding Stability",
	metric: "Cosine Similarity",
	mode: "full",
	scores: { dish_embed: 1.000, openai: 0.000, bge_m3: 0.506, qwen3: 0.001, e5_large: 0.170 }
	}
	]
	}
	];

	const container = document.getElementById("charts");

	SECTIONS.forEach((section) => {
	const groupEl = document.createElement("div");
	groupEl.className = "section-group";
	groupEl.innerHTML = `
	<div class="section-group-title">${section.title}</div>
	<div class="section-group-note">${section.note}</div>
	${section.annotation ? `<div class="section-annotation">${section.annotation}</div>` : ''}
	`;
	container.appendChild(groupEl);

	section.benchmarks.forEach(bench => {
	const benchMode = bench.mode \|\| section.mode \|\| "full";
	const models = benchMode === "reranker" ? RERANKER_MODELS : benchMode === "collapsed" ? COLLAPSED_MODELS : FULL_MODELS;
	const filteredModels = bench.filterZero
	? models.filter(m => bench.scores[m.key] != null && bench.scores[m.key] > 0)
	: models;
	const activeScores = filteredModels.map(m => bench.scores[m.key]).filter(s => s != null);
	const best = Math.max(...activeScores);
	const el = document.createElement("div");
	el.className = "benchmark";

	let html = `
	<div class="bench-header">
	<div class="bench-title">${bench.title}</div>
	<div class="bench-metric">${bench.metric}</div>
	</div>`;

	const sorted = [...filteredModels].sort((a, b) => (bench.scores[b.key] \|\| 0) - (bench.scores[a.key] \|\| 0));

	let addedRerankerLabel = false;
	sorted.forEach(model => {
	const score = bench.scores[model.key];
	if (score == null) return;
	const pct = (score * 100).toFixed(1);
	const barWidth = Math.max(0, (score / 1.0) * 100);
	const isBest = Math.abs(score - best) < 0.0001;
	const bestClass = isBest ? " best" : "";
	const valueInside = barWidth > 25;

	if (benchMode === "reranker" && !model.isOurs && !addedRerankerLabel) {
	html += `<div class="reranker-label">with BGE-Reranker-v2-M3</div>`;
	addedRerankerLabel = true;
	}

	html += `
	<div class="bar-row${bestClass}">
	<div class="bar-label">${model.label}</div>
	<div class="bar-track">
	<div class="bar-fill" style="width:${barWidth}%;background:${model.color}${isBest ? '' : 'cc'}">
	${valueInside ? `<span class="bar-value">${pct}</span>` : ''}
	</div>
	${!valueInside ? `<span class="bar-value-outside" style="--bar-width:${barWidth}%">${pct}</span>` : ''}
	</div>
	</div>`;
	});

	el.innerHTML = html;
	container.appendChild(el);
	});
	});
	</script>

	</body>
	</html>