benchmarks / index.html
latimal's picture
refactor: tables format, dark theme, food-embed.latimal.com
04fb3cf verified
Raw
History Blame Contribute Delete
10.3 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Latimal Production Benchmarks</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", sans-serif;
background: #09090b;
color: #fafafa;
max-width: 1200px;
margin: 0 auto;
padding: 40px 32px 32px;
-webkit-font-smoothing: antialiased;
}
.header { margin-bottom: 32px; }
.header h1 {
font-size: 1.6rem;
font-weight: 700;
letter-spacing: -0.02em;
margin-bottom: 8px;
}
.header .subtitle {
font-size: 0.88rem;
color: #a1a1aa;
line-height: 1.5;
max-width: 680px;
}
.section { margin-bottom: 40px; }
.section-title {
font-size: 1.15rem;
font-weight: 700;
margin-bottom: 4px;
}
.section-caption {
font-size: 0.78rem;
color: #71717a;
line-height: 1.5;
margin-bottom: 6px;
max-width: 640px;
}
.section-bullets {
list-style: none;
padding: 0;
margin-bottom: 12px;
}
.section-bullets li {
font-size: 0.75rem;
color: #71717a;
line-height: 1.5;
padding-left: 14px;
position: relative;
}
.section-bullets li::before {
content: "·";
position: absolute;
left: 2px;
color: #52525b;
font-weight: 700;
}
.section-annot {
font-size: 0.75rem;
color: #a1a1aa;
margin-top: 8px;
}
.scroll-wrap {
overflow-x: auto;
-webkit-overflow-scrolling: touch;
}
table {
width: 100%;
border-collapse: collapse;
font-size: 0.82rem;
border: 1px solid #27272a;
border-radius: 8px;
overflow: hidden;
font-variant-numeric: tabular-nums;
}
thead tr {
background: #18181b;
border-bottom: 1px solid #27272a;
}
th {
padding: 10px 14px;
text-align: right;
font-weight: 600;
white-space: nowrap;
vertical-align: bottom;
}
th.task-col {
text-align: left;
position: sticky;
left: 0;
z-index: 2;
background: #18181b;
border-right: 1px solid #27272a;
}
th .co {
display: block;
font-size: 0.65rem;
font-weight: 400;
color: #71717a;
line-height: 1.3;
}
th .model {
display: block;
color: #e4e4e7;
line-height: 1.4;
}
th.lead-col {
color: #fbbf24;
}
th.lead-col .model {
color: #fbbf24;
}
tbody tr {
border-bottom: 1px solid rgba(39,39,42,0.5);
}
tbody tr:last-child { border-bottom: none; }
tbody tr.avg-row {
border-top: 2px solid #3f3f46;
}
tbody tr.avg-row td,
tbody tr.avg-row th {
font-weight: 700;
}
td {
padding: 9px 14px;
text-align: right;
white-space: nowrap;
color: #71717a;
}
td.task-cell {
text-align: left;
position: sticky;
left: 0;
z-index: 1;
background: #09090b;
border-right: 1px solid #27272a;
color: #d4d4d8;
}
td.task-cell .sub {
display: block;
font-size: 0.68rem;
color: #52525b;
}
.avg-row td.task-cell { color: #fafafa; }
td.lead {
color: #a1a1aa;
}
td.lead-win {
color: #fbbf24;
font-weight: 600;
}
td.comp-win {
color: #e4e4e7;
font-weight: 600;
}
.avg-row td.lead { color: #d4d4d8; }
.avg-row td.lead-win { color: #fbbf24; font-weight: 700; }
.avg-row td:not(.task-cell):not(.lead):not(.lead-win):not(.comp-win) {
color: #a1a1aa;
}
.footer {
margin-top: 32px;
padding-top: 16px;
border-top: 1px solid #27272a;
font-size: 0.72rem;
color: #52525b;
line-height: 1.6;
display: flex;
justify-content: space-between;
gap: 24px;
}
.footer a {
color: #71717a;
text-decoration: none;
}
.footer a:hover { color: #a1a1aa; }
.footer-right { text-align: right; white-space: nowrap; }
@media (max-width: 700px) {
body { padding: 20px 12px 20px; }
.header h1 { font-size: 1.2rem; }
th, td { padding: 7px 10px; font-size: 0.75rem; }
th .co { font-size: 0.6rem; }
.footer { flex-direction: column; gap: 8px; }
.footer-right { text-align: left; }
td.task-cell .name {
display: block;
max-width: 36vw;
white-space: normal;
}
}
</style>
</head>
<body>
<div class="header">
<h1>Latimal Production Benchmarks</h1>
<p class="subtitle">How the full Latimal production pipeline compares to leading embedding models on food-domain tasks. All models at 384 dimensions.</p>
</div>
<div id="tables"></div>
<div class="footer">
<div>All models compared at 384 dimensions. Competitors paired with <a href="https://huggingface.co/BAAI/bge-reranker-v2-m3">bge-reranker-v2-m3</a>. Latimal: production API, measured at the public API boundary, reproducible with an API key.</div>
<div class="footer-right">June 2026<br><a href="https://latimal.com">latimal.com</a></div>
</div>
<script>
const COMPETITORS = [
{ company: "OpenAI", model: "text-embedding-3-large" },
{ company: "Voyage AI", model: "Voyage 4 Large" },
{ company: "Cohere", model: "Embed v4" },
{ company: "Alibaba", model: "GTE-large" },
{ company: "Nomic AI", model: "Nomic v1.5" },
{ company: "BAAI", model: "BGE-M3" },
{ company: "Microsoft", model: "E5-large" },
];
const TABLES = [
{
title: "Overall",
caption: "Category averages across the three tables below.",
bullets: [
"Search: production API. Matching and classification: raw embeddings + cosine.",
"Average: unweighted mean of the three category scores.",
],
leadCompany: "Latimal", leadModel: "Food Embed v1",
rows: [
{ task: "Average", sub: "3 categories", scores: [0.819, 0.682, 0.659, 0.643, 0.614, 0.624, 0.609, 0.505], avg: true },
{ task: "Search", sub: "Production NDCG@10, 4 tasks", scores: [0.869, 0.455, 0.447, 0.451, 0.427, 0.422, 0.408, 0.411] },
{ task: "Matching", sub: "Mean F1, 7 tasks", scores: [0.851, 0.758, 0.741, 0.741, 0.699, 0.739, 0.718, 0.704] },
{ task: "Classification", sub: "Macro F1, 1 task", scores: [0.738, 0.833, 0.789, 0.737, 0.716, 0.71, 0.701, 0.399] },
],
},
{
title: "Matching",
caption: "Best F1, 7 tasks.",
bullets: [
"Same-dish detection across cuisines, scripts, and noise levels.",
"Raw embeddings + cosine, no reranking.",
],
leadCompany: "Latimal", leadModel: "Food Embed v1",
rows: [
{ task: "Average", sub: "7 tasks", scores: [0.851, 0.758, 0.741, 0.741, 0.699, 0.739, 0.718, 0.704], avg: true },
{ task: "Indian cuisine", scores: [0.817, 0.745, 0.718, 0.732, 0.705, 0.731, 0.711, 0.68] },
{ task: "Global cuisine", scores: [0.867, 0.828, 0.783, 0.829, 0.695, 0.732, 0.716, 0.716] },
{ task: "Beverages", scores: [0.746, 0.715, 0.719, 0.71, 0.71, 0.715, 0.706, 0.706] },
{ task: "Bakery & desserts", scores: [0.755, 0.735, 0.715, 0.691, 0.682, 0.684, 0.684, 0.688] },
{ task: "Portion size", scores: [0.972, 0.849, 0.791, 0.835, 0.725, 0.855, 0.821, 0.757] },
{ task: "Noisy menu", scores: [0.916, 0.685, 0.64, 0.667, 0.672, 0.75, 0.674, 0.648] },
{ task: "Cross-lingual", scores: [0.886, 0.748, 0.82, 0.721, 0.707, 0.707, 0.717, 0.731] },
],
},
{
title: "Search",
caption: "Production search, NDCG@10.",
bullets: [
"Latimal: production API, measured at the public API boundary. Reproducible with an API key.",
"Competitors: embedding + bge-reranker-v2-m3.",
],
leadModel: "Latimal",
rows: [
{ task: "Average", sub: "4 tasks", scores: [0.869, 0.455, 0.447, 0.451, 0.427, 0.422, 0.408, 0.411], avg: true },
{ task: "Food search", sub: "NDCG@10", scores: [0.938, 0.59, 0.59, 0.589, 0.572, 0.564, 0.552, 0.554] },
{ task: "Concept search", sub: "NDCG@10", scores: [0.809, 0.405, 0.392, 0.391, 0.374, 0.357, 0.336, 0.328] },
{ task: "Diet & allergen search", sub: "NDCG@10", scores: [0.802, 0.172, 0.161, 0.165, 0.135, 0.132, 0.132, 0.136] },
{ task: "Noisy search", sub: "NDCG@10", scores: [0.925, 0.653, 0.644, 0.66, 0.628, 0.635, 0.614, 0.628] },
],
annotation: "Diet & allergen search: 4.7x the best competitor.",
},
{
title: "Classification",
caption: "Macro F1, 1 task. Linear probe on frozen embeddings, 26 menu classes.",
leadCompany: "Latimal", leadModel: "Food Embed v1",
rows: [
{ task: "Cuisine classification", sub: "Macro F1", scores: [0.738, 0.833, 0.789, 0.737, 0.716, 0.71, 0.701, 0.399] },
],
},
];
const container = document.getElementById("tables");
TABLES.forEach(t => {
const section = document.createElement("div");
section.className = "section";
let html = `<div class="section-title">${t.title}</div>`;
html += `<p class="section-caption">${t.caption}</p>`;
if (t.bullets) {
html += '<ul class="section-bullets">';
t.bullets.forEach(b => { html += `<li>${b}</li>`; });
html += '</ul>';
}
html += '<div class="scroll-wrap"><table><thead><tr>';
html += '<th class="task-col"><span class="model">Task</span></th>';
const leadLabel = t.leadCompany
? `<span class="co">${t.leadCompany}</span><span class="model">${t.leadModel}</span>`
: `<span class="model">${t.leadModel}</span>`;
html += `<th class="lead-col">${leadLabel}</th>`;
COMPETITORS.forEach(c => {
html += `<th><span class="co">${c.company}</span><span class="model">${c.model}</span></th>`;
});
html += '</tr></thead><tbody>';
t.rows.forEach(row => {
const best = Math.max(...row.scores);
const rowCls = row.avg ? ' class="avg-row"' : '';
html += `<tr${rowCls}>`;
html += `<td class="task-cell"><span class="name">${row.task}</span>${row.sub ? `<span class="sub">${row.sub}</span>` : ''}</td>`;
row.scores.forEach((score, i) => {
const isBest = Math.abs(score - best) < 1e-9;
const isLead = i === 0;
let cls;
if (isBest && isLead) cls = "lead-win";
else if (isBest && !isLead) cls = "comp-win";
else if (isLead) cls = "lead";
else cls = "";
html += `<td class="${cls}">${score.toFixed(3)}</td>`;
});
html += '</tr>';
});
html += '</tbody></table></div>';
if (t.annotation) html += `<p class="section-annot">${t.annotation}</p>`;
section.innerHTML = html;
container.appendChild(section);
});
</script>
</body>
</html>