{EVAL_TABS.find((t) => t.id === section)?.desc}

{error && } {/* ---- Similarity Distribution ---- */} {section === "distribution" && (

{distrib && (

{[ { label: "Mean", value: distrib.mean }, { label: "Std Dev", value: distrib.std }, { label: "Min", value: distrib.min }, { label: "Max", value: distrib.max }, ].map((m) => ( ))}

Histogram

v.toFixed(1)} /> [Number(value), "Count"]} labelFormatter={(v: unknown) => `Similarity: ${Number(v).toFixed(2)}`} /> {distrib.histogram.map((entry, i) => ( = 0.5 ? "var(--ok)" : entry.bin_start >= 0 ? "var(--accent)" : "var(--err)"} /> ))}

Percentiles

{Object.keys(distrib.percentiles).map((p) => ( ))} {Object.values(distrib.percentiles).map((v, i) => ( ))}

P{p}
{v.toFixed(4)}

)}

)} {/* ---- Disambiguation Evaluation ---- */} {section === "disambiguation" && (

{/* Keyword */}

Keyword setKeyword(e.target.value)} placeholder='e.g. "pizza"' />

{/* Candidate Meanings */}

Candidate Meanings — describe each possible meaning {meanings.map((m, i) => (

{i + 1}. updateMeaning(i, e.target.value)} placeholder={`Meaning ${i + 1} description...`} style={{ flex: 1 }} /> {meanings.length > 2 && ( )}

))}

{/* Ground Truth Examples */}

Labeled Examples — sentences using the keyword, with the correct meaning {gtRows.map((row, i) => ( ))}

Sentence	Correct Meaning
updateGtRow(i, "text", e.target.value)} placeholder="A sentence containing the keyword..." style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} />		{gtRows.length > 1 && ( )}

{disambig && disambig.map((m) => (

Results: "{m.keyword}" ({m.total_samples} samples)

Per-Meaning Scores

{Object.keys(m.per_meaning_f1).map((meaning) => ( ))}

Meaning	Precision	Recall	F1
{meaning}	{m.per_meaning_precision[meaning]?.toFixed(4) ?? "-"}	{m.per_meaning_recall[meaning]?.toFixed(4) ?? "-"}	{m.per_meaning_f1[meaning]?.toFixed(4) ?? "-"}

{m.confusion_matrix && ( <>

Confusion Matrix

{Object.keys(m.per_meaning_f1).map((meaning) => ( ))} {m.confusion_matrix.map((row, i) => ( {row.map((val, j) => ( ))} ))}

True \ Predicted	{meaning}
{Object.keys(m.per_meaning_f1)[i]}	0 ? "var(--err)" : "var(--text-dim)", }} > {val}

)}

))}

)} {/* ---- Retrieval Evaluation ---- */} {section === "retrieval" && (

Search Queries — enter queries and what text they should find {retRows.map((row, i) => ( ))}

Query	Expected Match (text snippet)
updateRetRow(i, "query", e.target.value)} placeholder="A search query..." style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} />	updateRetRow(i, "relevantText", e.target.value)} placeholder="Text that should match..." style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} />	{retRows.length > 1 && ( )}

{retrieval && (

{retrieval.map((m, i) => ( ))}

Query	MRR	P@1	P@3	P@5	Top Score
{m.query.length > 50 ? m.query.slice(0, 50) + "..." : m.query}	{m.mrr.toFixed(3)}	{m.precision_at_k["1"]?.toFixed(2) ?? "-"}	{m.precision_at_k["3"]?.toFixed(2) ?? "-"}	{m.precision_at_k["5"]?.toFixed(2) ?? "-"}	{m.top_score.toFixed(3)}

s + m.mrr, 0) / retrieval.length).toFixed(3)} label="Mean MRR" /> s + (m.precision_at_k["5"] ?? 0), 0) / retrieval.length).toFixed(3)} label="Mean P@5" /> s + m.top_score, 0) / retrieval.length).toFixed(3)} label="Mean Top Score" />

)}