| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Dish-Embed Benchmark Results</title> |
| <style> |
| :root { |
| --bg: #ffffff; |
| --text: #1a1a2e; |
| --muted: #6b7280; |
| --green: #16a34a; |
| } |
| * { box-sizing: border-box; margin: 0; padding: 0; } |
| body { |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", sans-serif; |
| background: var(--bg); |
| color: var(--text); |
| max-width: 1100px; |
| margin: 0 auto; |
| padding: 36px 32px 28px; |
| } |
| |
| |
| .header { margin-bottom: 24px; } |
| .header h1 { |
| font-size: 1.5rem; |
| font-weight: 700; |
| letter-spacing: -0.02em; |
| margin-bottom: 5px; |
| } |
| .header .subtitle { |
| font-size: 0.85rem; |
| color: var(--muted); |
| line-height: 1.4; |
| } |
| |
| |
| .legend { |
| display: flex; |
| gap: 14px; |
| flex-wrap: nowrap; |
| margin-bottom: 22px; |
| padding: 10px 14px; |
| background: #f9fafb; |
| border-radius: 6px; |
| border: 1px solid #e5e7eb; |
| } |
| .legend-item { |
| display: flex; |
| align-items: center; |
| gap: 5px; |
| font-size: 0.68rem; |
| color: #374151; |
| white-space: nowrap; |
| } |
| .legend-desc { |
| font-size: 0.58rem; |
| color: #9ca3af; |
| } |
| .legend-dot { |
| width: 10px; |
| height: 10px; |
| border-radius: 2px; |
| flex-shrink: 0; |
| } |
| .legend-sep { |
| width: 1px; |
| height: 18px; |
| background: #d1d5db; |
| align-self: center; |
| } |
| |
| |
| .section-group { |
| grid-column: 1 / -1; |
| margin-top: 10px; |
| padding-top: 16px; |
| border-top: 1px solid #e5e7eb; |
| } |
| .section-group:first-child { |
| margin-top: 0; |
| padding-top: 0; |
| border-top: none; |
| } |
| .section-group-title { |
| font-size: 0.92rem; |
| font-weight: 700; |
| color: var(--text); |
| margin-bottom: 3px; |
| } |
| .section-group-note { |
| font-size: 0.72rem; |
| color: #9ca3af; |
| line-height: 1.4; |
| margin-bottom: 4px; |
| } |
| |
| |
| .charts-grid { |
| display: grid; |
| grid-template-columns: 1fr 1fr; |
| gap: 20px 28px; |
| } |
| |
| |
| .benchmark {} |
| .bench-header { |
| display: flex; |
| justify-content: space-between; |
| align-items: baseline; |
| margin-bottom: 8px; |
| } |
| .bench-title { |
| font-size: 0.85rem; |
| font-weight: 600; |
| } |
| .bench-metric { |
| font-size: 0.68rem; |
| color: var(--muted); |
| font-weight: 500; |
| } |
| |
| |
| .bar-row { |
| display: flex; |
| align-items: center; |
| margin-bottom: 4px; |
| } |
| .bar-label { |
| width: 110px; |
| font-size: 0.72rem; |
| color: #6b7280; |
| flex-shrink: 0; |
| text-align: right; |
| padding-right: 10px; |
| } |
| .bar-track { |
| flex: 1; |
| height: 22px; |
| background: #f3f4f6; |
| border-radius: 3px; |
| position: relative; |
| overflow: hidden; |
| } |
| .bar-fill { |
| height: 100%; |
| border-radius: 3px; |
| position: relative; |
| } |
| .bar-value { |
| position: absolute; |
| right: 6px; |
| top: 50%; |
| transform: translateY(-50%); |
| font-size: 0.68rem; |
| font-weight: 700; |
| color: white; |
| text-shadow: 0 1px 2px rgba(0,0,0,0.2); |
| } |
| .bar-value-outside { |
| position: absolute; |
| left: calc(var(--bar-width) + 6px); |
| top: 50%; |
| transform: translateY(-50%); |
| font-size: 0.68rem; |
| font-weight: 600; |
| color: #374151; |
| } |
| .bar-row.best .bar-label { |
| font-weight: 700; |
| color: var(--text); |
| } |
| |
| |
| .glossary { |
| margin-top: 22px; |
| padding-top: 14px; |
| border-top: 1px solid #e5e7eb; |
| } |
| .glossary-title { |
| font-size: 0.78rem; |
| font-weight: 700; |
| color: var(--text); |
| margin-bottom: 8px; |
| } |
| .glossary-grid { |
| display: grid; |
| grid-template-columns: 1fr 1fr; |
| gap: 4px 28px; |
| } |
| .glossary-item { |
| font-size: 0.68rem; |
| color: #6b7280; |
| line-height: 1.5; |
| } |
| .glossary-item strong { |
| color: #374151; |
| margin-right: 4px; |
| } |
| |
| |
| .section-annotation { |
| font-size: 0.68rem; |
| color: #64748b; |
| background: #f1f5f9; |
| border-radius: 4px; |
| padding: 6px 12px; |
| margin-bottom: 4px; |
| line-height: 1.4; |
| } |
| |
| |
| .footer { |
| margin-top: 22px; |
| padding-top: 14px; |
| border-top: 1px solid #e5e7eb; |
| font-size: 0.68rem; |
| color: #9ca3af; |
| line-height: 1.5; |
| display: flex; |
| justify-content: space-between; |
| gap: 24px; |
| } |
| .footer-left { flex: 1; } |
| .footer-right { text-align: right; white-space: nowrap; } |
| |
| |
| @media (max-width: 700px) { |
| body { padding: 20px 14px 18px; } |
| .header h1 { font-size: 1.15rem; } |
| .legend { gap: 10px 14px; padding: 8px 12px; } |
| .legend-item { font-size: 0.7rem; } |
| .charts-grid { |
| grid-template-columns: 1fr; |
| gap: 18px; |
| } |
| .bar-label { |
| width: 72px; |
| font-size: 0.65rem; |
| padding-right: 6px; |
| } |
| .bar-track { height: 20px; } |
| .bar-value { font-size: 0.6rem; right: 4px; } |
| .bar-value-outside { font-size: 0.6rem; } |
| .bench-title { font-size: 0.78rem; } |
| .bench-metric { font-size: 0.62rem; } |
| .section-group-title { font-size: 0.82rem; } |
| .section-group-note { font-size: 0.65rem; } |
| .footer { flex-direction: column; gap: 8px; } |
| .footer-right { text-align: left; } |
| } |
| </style> |
| </head> |
| <body> |
|
|
| <div class="header"> |
| <h1>Dish-Embed: Food Embedding Benchmark Results</h1> |
| <p class="subtitle">Domain-specialized food embedding model vs general-purpose alternatives. All models evaluated at 384 dimensions on identical benchmark data.</p> |
| </div> |
|
|
| <div class="legend"> |
| <div class="legend-item"><div class="legend-dot" style="background:#16a34a"></div>Dish-Embed</div> |
| <div class="legend-item"><div class="legend-dot" style="background:#ea580c"></div>OpenAI TE3-Large</div> |
| <div class="legend-item"><div class="legend-dot" style="background:#6b7280"></div>BAAI BGE-M3</div> |
| <div class="legend-item"><div class="legend-dot" style="background:#2563eb"></div>Qwen3-Embedding-0.6B <span class="legend-desc">#1 MTEB Multilingual</span></div> |
| <div class="legend-item"><div class="legend-dot" style="background:#9333ea"></div>Microsoft E5-Large-v2</div> |
| <div class="legend-item"><div class="legend-dot" style="background:#64748b"></div>BGE-Reranker-v2-M3 <span class="legend-desc">Best public reranker</span></div> |
| </div> |
|
|
| <div id="charts" class="charts-grid"></div> |
|
|
| <div class="glossary"> |
| <div class="glossary-title">Benchmark Glossary</div> |
| <div class="glossary-grid"> |
| <div class="glossary-item"><strong>Indian Cuisine Matching</strong> Matching "Aloo Gobi" to "Potato Cauliflower Curry", "Dal Makhani" to "Black Lentil Curry" across restaurants.</div> |
| <div class="glossary-item"><strong>Cross-Language Matching</strong> Matching "ラーメン" to "Ramen", "خبز نان" to "Naan Bread" across languages and scripts.</div> |
| <div class="glossary-item"><strong>Bakery & Dessert Matching</strong> Matching "Pain au Chocolat" to "Chocolate Croissant", "Crème Brûlée" to "Caramelized Custard".</div> |
| <div class="glossary-item"><strong>Beverage Matching</strong> Matching "Iced Americano" to "Cold Black Coffee", "Masala Chai" to "Spiced Tea Latte" across naming conventions.</div> |
| <div class="glossary-item"><strong>Synonym Recognition</strong> Retrieving "Pad Kra Pao" from a query for "Thai Basil Stir-Fry", or "Gyoza" from "Pot Stickers".</div> |
| <div class="glossary-item"><strong>Cuisine Classification</strong> Classifying "Tom Yum Goong" as Thai, "Cacio e Pepe" as Italian from the dish name alone. 19 cuisine categories.</div> |
| <div class="glossary-item"><strong>Category Search</strong> Searching "Thai soups" or "grilled appetizers" and ranking relevant menu items.</div> |
| <div class="glossary-item"><strong>Typo-Tolerant Search</strong> Returning "Margherita Pizza" when a customer types "margarita piza".</div> |
| <div class="glossary-item"><strong>Food Search</strong> General menu search ranking across diverse food queries and item catalogs.</div> |
| <div class="glossary-item"><strong>Global Search</strong> Search across multilingual menus spanning 15+ cuisines worldwide.</div> |
| <div class="glossary-item"><strong>Portion Size Sensitivity</strong> Ignoring portion labels like "Regular", "Family Pack", "Serves 2", "250ml" when matching the same dish. Generic models treat size text as meaningful content.</div> |
| <div class="glossary-item"><strong>Noisy Menu Matching</strong> Matching "***BEST SELLER*** Paneer Tikka - Chef's Special!!" to "Paneer Tikka" on another menu.</div> |
| <div class="glossary-item"><strong>Bilingual Menu Matching</strong> Matching "Falafel Wrap فلافل راب" to "Falafel Wrap" on menus that mix scripts.</div> |
| <div class="glossary-item"><strong>Embedding Stability</strong> Producing identical embeddings for "Fried Rice", "炒飯", and "フライドライス". 1.0 = perfectly consistent across scripts.</div> |
| </div> |
| </div> |
|
|
| <div class="footer"> |
| <div class="footer-left">All competing models paired with BGE-Reranker-v2-M3, the strongest publicly available reranker.</div> |
| <div class="footer-right">April 2026<br>Dish-Embed · embed.statode.com<br><a href="mailto:adityapatni.work@gmail.com" style="color:#9ca3af;text-decoration:none">adityapatni.work@gmail.com</a></div> |
| </div> |
|
|
| <script> |
| const FULL_MODELS = [ |
| { key: "dish_embed", label: "Dish-Embed", color: "#16a34a" }, |
| { key: "openai", label: "OpenAI TE3L", color: "#ea580c" }, |
| { key: "bge_m3", label: "BGE-M3", color: "#6b7280" }, |
| { key: "qwen3", label: "Qwen3-0.6B", color: "#2563eb" }, |
| { key: "e5_large", label: "E5-Large-v2", color: "#9333ea" }, |
| ]; |
| |
| const COLLAPSED_MODELS = [ |
| { key: "dish_embed", label: "Dish-Embed", color: "#16a34a" }, |
| { key: "off_shelf", label: "All others", color: "#64748b" }, |
| ]; |
| |
| const RERANKER_MODELS = [ |
| { key: "dish_embed", label: "Dish-Embed", color: "#16a34a", isOurs: true }, |
| { key: "openai", label: "OpenAI TE3L", color: "#64748b" }, |
| { key: "bge_m3", label: "BGE-M3", color: "#64748b" }, |
| { key: "qwen3", label: "Qwen3-0.6B", color: "#64748b" }, |
| { key: "e5_large", label: "E5-Large-v2", color: "#64748b" }, |
| ]; |
| |
| const SECTIONS = [ |
| { |
| title: "Food Understanding", |
| note: "Core food knowledge that powers synonym-aware search, cuisine tagging, and regional variant detection.", |
| mode: "full", |
| benchmarks: [ |
| { |
| title: "Synonym Recognition", |
| metric: "Recall@5", |
| scores: { dish_embed: 0.808, openai: 0.749, bge_m3: 0.707, qwen3: 0.514, e5_large: 0.661 } |
| }, |
| { |
| title: "Cuisine Classification (19 cuisines)", |
| metric: "Macro Accuracy", |
| scores: { dish_embed: 0.889, openai: 0.822, bge_m3: 0.762, qwen3: 0.439, e5_large: 0.298 } |
| }, |
| ] |
| }, |
| { |
| title: "Menu Search", |
| note: "Ranking relevant menu items when customers search for 'Thai soups' or type 'chiken tikka' with a typo.", |
| mode: "full", |
| benchmarks: [ |
| { |
| title: "Category Search", |
| metric: "NDCG@10", |
| scores: { dish_embed: 0.828, openai: 0.797, bge_m3: 0.759, qwen3: 0.802, e5_large: 0.799 } |
| }, |
| { |
| title: "Typo-Tolerant Search", |
| metric: "NDCG@10", |
| scores: { dish_embed: 0.920, openai: 0.884, bge_m3: 0.902, qwen3: 0.892, e5_large: 0.907 } |
| }, |
| { |
| title: "Food Search", |
| metric: "NDCG@10", |
| scores: { dish_embed: 0.943, openai: 0.925, bge_m3: 0.929, qwen3: 0.935, e5_large: 0.939 } |
| }, |
| { |
| title: "Global Search", |
| metric: "NDCG@10", |
| scores: { dish_embed: 0.891, openai: 0.839, bge_m3: 0.886, qwen3: 0.875, e5_large: 0.860 } |
| } |
| ] |
| }, |
| { |
| title: "Cross-Restaurant Item Matching", |
| note: "Matching 'Gyoza' to 'Pot Stickers', 'Crème Brûlée' to 'Burnt Cream Custard', 'Dal Makhani' to 'Black Lentil Curry' across thousands of restaurants. Powers price comparison, catalog consolidation, and menu analytics.", |
| annotation: "OpenAI TE3-Large, BAAI BGE-M3, Qwen3-Embedding-0.6B, and Microsoft E5-Large-v2 all paired with BGE-Reranker-v2-M3. The reranker determines matching quality, so all embedding models produce identical scores.", |
| mode: "collapsed", |
| benchmarks: [ |
| { |
| title: "Indian Cuisine Matching", |
| metric: "F1", |
| scores: { dish_embed: 0.916, off_shelf: 0.754 } |
| }, |
| { |
| title: "Cross-Language Matching", |
| metric: "F1", |
| scores: { dish_embed: 0.831, off_shelf: 0.258 } |
| }, |
| { |
| title: "Bakery & Dessert Matching", |
| metric: "F1", |
| scores: { dish_embed: 0.797, off_shelf: 0.655 } |
| }, |
| { |
| title: "Beverage Matching", |
| metric: "F1", |
| scores: { dish_embed: 0.747, off_shelf: 0.648 } |
| }, |
| ] |
| }, |
| { |
| title: "Robustness", |
| note: "Consistent performance across portion sizes, formatting differences, and platform-specific conventions.", |
| mode: "mixed", |
| benchmarks: [ |
| { |
| title: "Portion Size Sensitivity", |
| metric: "F1", |
| mode: "collapsed", |
| scores: { dish_embed: 0.877, off_shelf: 0.082 } |
| }, |
| { |
| title: "Noisy Menu Matching", |
| metric: "F1", |
| mode: "collapsed", |
| scores: { dish_embed: 0.922, off_shelf: 0.914 } |
| }, |
| { |
| title: "Bilingual Menu Matching", |
| metric: "F1", |
| mode: "collapsed", |
| scores: { dish_embed: 0.886, off_shelf: 0.879 } |
| }, |
| { |
| title: "Embedding Stability", |
| metric: "Cosine Similarity", |
| mode: "full", |
| scores: { dish_embed: 1.000, openai: 0.000, bge_m3: 0.506, qwen3: 0.001, e5_large: 0.170 } |
| } |
| ] |
| } |
| ]; |
| |
| const container = document.getElementById("charts"); |
| |
| SECTIONS.forEach((section) => { |
| const groupEl = document.createElement("div"); |
| groupEl.className = "section-group"; |
| groupEl.innerHTML = ` |
| <div class="section-group-title">${section.title}</div> |
| <div class="section-group-note">${section.note}</div> |
| ${section.annotation ? `<div class="section-annotation">${section.annotation}</div>` : ''} |
| `; |
| container.appendChild(groupEl); |
| |
| section.benchmarks.forEach(bench => { |
| const benchMode = bench.mode || section.mode || "full"; |
| const models = benchMode === "reranker" ? RERANKER_MODELS : benchMode === "collapsed" ? COLLAPSED_MODELS : FULL_MODELS; |
| const filteredModels = bench.filterZero |
| ? models.filter(m => bench.scores[m.key] != null && bench.scores[m.key] > 0) |
| : models; |
| const activeScores = filteredModels.map(m => bench.scores[m.key]).filter(s => s != null); |
| const best = Math.max(...activeScores); |
| const el = document.createElement("div"); |
| el.className = "benchmark"; |
| |
| let html = ` |
| <div class="bench-header"> |
| <div class="bench-title">${bench.title}</div> |
| <div class="bench-metric">${bench.metric}</div> |
| </div>`; |
| |
| const sorted = [...filteredModels].sort((a, b) => (bench.scores[b.key] || 0) - (bench.scores[a.key] || 0)); |
| |
| let addedRerankerLabel = false; |
| sorted.forEach(model => { |
| const score = bench.scores[model.key]; |
| if (score == null) return; |
| const pct = (score * 100).toFixed(1); |
| const barWidth = Math.max(0, (score / 1.0) * 100); |
| const isBest = Math.abs(score - best) < 0.0001; |
| const bestClass = isBest ? " best" : ""; |
| const valueInside = barWidth > 25; |
| |
| if (benchMode === "reranker" && !model.isOurs && !addedRerankerLabel) { |
| html += `<div class="reranker-label">with BGE-Reranker-v2-M3</div>`; |
| addedRerankerLabel = true; |
| } |
| |
| html += ` |
| <div class="bar-row${bestClass}"> |
| <div class="bar-label">${model.label}</div> |
| <div class="bar-track"> |
| <div class="bar-fill" style="width:${barWidth}%;background:${model.color}${isBest ? '' : 'cc'}"> |
| ${valueInside ? `<span class="bar-value">${pct}</span>` : ''} |
| </div> |
| ${!valueInside ? `<span class="bar-value-outside" style="--bar-width:${barWidth}%">${pct}</span>` : ''} |
| </div> |
| </div>`; |
| }); |
| |
| el.innerHTML = html; |
| container.appendChild(el); |
| }); |
| }); |
| </script> |
|
|
| </body> |
| </html> |
|
|