esfiles / frontend /src /components /EvaluationDashboard.tsx
Besjon Cifliku
feat: initial project setup
db764ae
import { useState } from "react";
import {
BarChart,
Bar,
XAxis,
YAxis,
CartesianGrid,
Tooltip,
ResponsiveContainer,
Cell,
} from "recharts";
import { api, getErrorMessage } from "../api";
import type { EvalSection, SimilarityDistribution, DisambiguationMetric, RetrievalMetric } from "../types";
import StatusMessage from "./StatusMessage";
import MetricCard from "./MetricCard";
// ---- Structured form types ----
interface GtRow {
text: string;
meaning: string;
}
interface RetrievalRow {
query: string;
relevantText: string;
}
// ---- Example data ----
const EXAMPLE_KEYWORD = "pizza";
const EXAMPLE_MEANINGS = [
"school, education, and academic activities like homework and tests",
"food, Italian cuisine, restaurant, cooking, and eating",
];
const EXAMPLE_GT: GtRow[] = [
{ text: "I love pizza so much, I go there every day", meaning: "school" },
{ text: "pizza gives me homework", meaning: "school" },
{ text: "she made the best margherita pizza in the city", meaning: "food" },
{ text: "pizza dough recipe used tipo 00 flour", meaning: "food" },
{ text: "The pizza test is going to be so hard", meaning: "school" },
{ text: "This pizza is amazing, the crust is perfectly crispy", meaning: "food" },
];
const EXAMPLE_RETRIEVAL: RetrievalRow[] = [
{ query: "kids using secret code words for school", relevantText: "secret language" },
{ query: "Italian restaurant with wood-fired oven", relevantText: "pizza" },
];
// ---- Meaning label helpers ----
function getMeaningLabels(meanings: string[]): string[] {
return meanings.map((m) => {
const first = m.split(",")[0].trim();
return first.length > 20 ? first.slice(0, 20) : first;
});
}
// ---- Tab config ----
const EVAL_TABS: { id: EvalSection; label: string; desc: string }[] = [
{
id: "distribution",
label: "Distribution",
desc: "Analyze pairwise similarity distribution across your corpus. One-click — no setup needed.",
},
{
id: "disambiguation",
label: "Disambiguation",
desc: "Test whether the engine can tell apart different meanings of the same word. Provide example sentences and label each with the intended meaning.",
},
{
id: "retrieval",
label: "Retrieval",
desc: "Measure how well the engine finds relevant documents for a given query. Provide search queries and what text they should match.",
},
];
export default function EvaluationDashboard() {
const [section, setSection] = useState<EvalSection>("distribution");
const [distrib, setDistrib] = useState<SimilarityDistribution | null>(null);
const [disambig, setDisambig] = useState<DisambiguationMetric[] | null>(null);
const [retrieval, setRetrieval] = useState<RetrievalMetric[] | null>(null);
const [loading, setLoading] = useState("");
const [error, setError] = useState("");
// Disambiguation structured form
const [keyword, setKeyword] = useState("");
const [meanings, setMeanings] = useState<string[]>(["", ""]);
const [gtRows, setGtRows] = useState<GtRow[]>([{ text: "", meaning: "" }]);
// Retrieval structured form
const [retRows, setRetRows] = useState<RetrievalRow[]>([{ query: "", relevantText: "" }]);
// ---- Distribution ----
async function fetchDistribution() {
setLoading("distrib");
setError("");
try {
setDistrib(await api.getSimilarityDistribution());
} catch (err) {
setError(getErrorMessage(err));
} finally {
setLoading("");
}
}
// ---- Disambiguation ----
function loadDisambiguationExample() {
setKeyword(EXAMPLE_KEYWORD);
setMeanings([...EXAMPLE_MEANINGS]);
setGtRows(EXAMPLE_GT.map((r) => ({ ...r })));
}
function updateMeaning(i: number, val: string) {
const next = [...meanings];
next[i] = val;
setMeanings(next);
}
function addMeaning() {
setMeanings([...meanings, ""]);
}
function removeMeaning(i: number) {
if (meanings.length <= 2) return;
setMeanings(meanings.filter((_, idx) => idx !== i));
// Update GT rows that referenced removed meaning
const labels = getMeaningLabels(meanings);
const removed = labels[i];
setGtRows(gtRows.map((r) => (r.meaning === removed ? { ...r, meaning: "" } : r)));
}
function updateGtRow(i: number, field: keyof GtRow, val: string) {
const next = [...gtRows];
next[i] = { ...next[i], [field]: val };
setGtRows(next);
}
function addGtRow() {
setGtRows([...gtRows, { text: "", meaning: "" }]);
}
function removeGtRow(i: number) {
if (gtRows.length <= 1) return;
setGtRows(gtRows.filter((_, idx) => idx !== i));
}
async function runDisambiguation() {
if (!keyword.trim()) { setError("Enter a keyword."); return; }
const validMeanings = meanings.filter((m) => m.trim());
if (validMeanings.length < 2) { setError("Add at least 2 meanings."); return; }
const validGt = gtRows.filter((r) => r.text.trim() && r.meaning);
if (validGt.length < 2) { setError("Add at least 2 labeled examples."); return; }
setLoading("disambig");
setError("");
try {
const labels = getMeaningLabels(meanings);
const ground_truth = validGt.map((r) => ({
keyword: keyword.trim(),
text: r.text,
true_meaning: r.meaning,
}));
const candidate_meanings: Record<string, string[]> = {
[keyword.trim()]: validMeanings,
};
// Map GT meaning labels back to full candidate strings for the API
// The API compares against candidates, so true_meaning should match a candidate label
// We use short labels for the dropdown, but the API uses them as-is for matching
const res = await api.evalDisambiguation({ ground_truth, candidate_meanings });
setDisambig(res.metrics);
} catch (e) {
setError(getErrorMessage(e));
} finally {
setLoading("");
}
}
// ---- Retrieval ----
function loadRetrievalExample() {
setRetRows(EXAMPLE_RETRIEVAL.map((r) => ({ ...r })));
}
function updateRetRow(i: number, field: keyof RetrievalRow, val: string) {
const next = [...retRows];
next[i] = { ...next[i], [field]: val };
setRetRows(next);
}
function addRetRow() {
setRetRows([...retRows, { query: "", relevantText: "" }]);
}
function removeRetRow(i: number) {
if (retRows.length <= 1) return;
setRetRows(retRows.filter((_, idx) => idx !== i));
}
async function runRetrieval() {
const valid = retRows.filter((r) => r.query.trim());
if (valid.length === 0) { setError("Add at least one query."); return; }
setLoading("retrieval");
setError("");
try {
const queries = valid.map((r) => ({
query: r.query,
relevant_texts: r.relevantText.trim() ? [r.relevantText.trim()] : [],
}));
const res = await api.evalRetrieval({ queries, k_values: [1, 3, 5, 10] });
setRetrieval(res.metrics);
} catch (e) {
setError(getErrorMessage(e));
} finally {
setLoading("");
}
}
// ---- Meaning labels for dropdown ----
const meaningLabels = getMeaningLabels(meanings);
return (
<div>
<nav className="subtabs mb-2">
{EVAL_TABS.map((t) => (
<button
key={t.id}
className={`subtab ${section === t.id ? "subtab-active" : ""}`}
onClick={() => { setSection(t.id); setError(""); }}
>
{t.label}
</button>
))}
</nav>
<p className="panel-desc">{EVAL_TABS.find((t) => t.id === section)?.desc}</p>
{error && <StatusMessage type="err" message={error} />}
{/* ---- Similarity Distribution ---- */}
{section === "distribution" && (
<div className="panel">
<button className="btn btn-primary" onClick={fetchDistribution} disabled={loading === "distrib"}>
{loading === "distrib" ? "Computing..." : "Compute Distribution"}
</button>
{distrib && (
<div className="mt-2">
<div className="metric-grid mb-3">
{[
{ label: "Mean", value: distrib.mean },
{ label: "Std Dev", value: distrib.std },
{ label: "Min", value: distrib.min },
{ label: "Max", value: distrib.max },
].map((m) => (
<MetricCard key={m.label} value={m.value.toFixed(3)} label={m.label} />
))}
</div>
<h3>Histogram</h3>
<ResponsiveContainer width="100%" height={250}>
<BarChart data={distrib.histogram}>
<CartesianGrid strokeDasharray="3 3" stroke="var(--border)" />
<XAxis
dataKey="bin_start"
tick={{ fill: "var(--text-dim)", fontSize: 11 }}
tickFormatter={(v: number) => v.toFixed(1)}
/>
<YAxis tick={{ fill: "var(--text-dim)", fontSize: 11 }} />
<Tooltip
contentStyle={{
background: "var(--surface)",
border: "1px solid var(--border)",
borderRadius: 6,
color: "var(--text)",
}}
formatter={(value: unknown) => [Number(value), "Count"]}
labelFormatter={(v: unknown) => `Similarity: ${Number(v).toFixed(2)}`}
/>
<Bar dataKey="count" radius={[4, 4, 0, 0]}>
{distrib.histogram.map((entry, i) => (
<Cell
key={i}
fill={entry.bin_start >= 0.5 ? "var(--ok)" : entry.bin_start >= 0 ? "var(--accent)" : "var(--err)"}
/>
))}
</Bar>
</BarChart>
</ResponsiveContainer>
<h3 className="mt-2">Percentiles</h3>
<table className="data-table">
<thead>
<tr>
{Object.keys(distrib.percentiles).map((p) => (
<th key={p}>P{p}</th>
))}
</tr>
</thead>
<tbody>
<tr>
{Object.values(distrib.percentiles).map((v, i) => (
<td key={i}>{v.toFixed(4)}</td>
))}
</tr>
</tbody>
</table>
</div>
)}
</div>
)}
{/* ---- Disambiguation Evaluation ---- */}
{section === "disambiguation" && (
<div className="panel">
<div className="flex-row gap-2 mb-2">
<button className="btn btn-secondary" onClick={loadDisambiguationExample}>
Load Example
</button>
</div>
{/* Keyword */}
<div className="form-group mb-2" style={{ maxWidth: 300 }}>
<label>Keyword</label>
<input
value={keyword}
onChange={(e) => setKeyword(e.target.value)}
placeholder='e.g. "pizza"'
/>
</div>
{/* Candidate Meanings */}
<div className="mb-2">
<label className="section-label">
Candidate Meanings
<span className="text-dim"> — describe each possible meaning</span>
</label>
{meanings.map((m, i) => (
<div key={i} className="flex-row gap-1 mb-1">
<span className="text-dim" style={{ minWidth: 24 }}>{i + 1}.</span>
<input
value={m}
onChange={(e) => updateMeaning(i, e.target.value)}
placeholder={`Meaning ${i + 1} description...`}
style={{ flex: 1 }}
/>
{meanings.length > 2 && (
<button className="btn btn-secondary" onClick={() => removeMeaning(i)}>
&times;
</button>
)}
</div>
))}
<button className="btn btn-secondary mt-1" onClick={addMeaning}>
+ Add Meaning
</button>
</div>
{/* Ground Truth Examples */}
<div className="mb-2">
<label className="section-label">
Labeled Examples
<span className="text-dim"> — sentences using the keyword, with the correct meaning</span>
</label>
<table className="data-table">
<thead>
<tr>
<th style={{ width: "60%" }}>Sentence</th>
<th>Correct Meaning</th>
<th style={{ width: 40 }} />
</tr>
</thead>
<tbody>
{gtRows.map((row, i) => (
<tr key={i}>
<td>
<input
value={row.text}
onChange={(e) => updateGtRow(i, "text", e.target.value)}
placeholder="A sentence containing the keyword..."
style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
/>
</td>
<td>
<select
value={row.meaning}
onChange={(e) => updateGtRow(i, "meaning", e.target.value)}
style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
>
<option value="">Select...</option>
{meaningLabels.map((label, j) => (
<option key={j} value={label}>{label}</option>
))}
</select>
</td>
<td>
{gtRows.length > 1 && (
<button className="btn btn-secondary" onClick={() => removeGtRow(i)}>
&times;
</button>
)}
</td>
</tr>
))}
</tbody>
</table>
<button className="btn btn-secondary mt-1" onClick={addGtRow}>
+ Add Example
</button>
</div>
<button
className="btn btn-primary"
onClick={runDisambiguation}
disabled={loading === "disambig"}
>
{loading === "disambig" ? "Evaluating..." : "Run Evaluation"}
</button>
{disambig && disambig.map((m) => (
<div key={m.keyword} className="mt-3">
<h3>Results: "{m.keyword}" ({m.total_samples} samples)</h3>
<div className="metric-grid mb-2">
<MetricCard value={`${(m.accuracy * 100).toFixed(1)}%`} label="Accuracy" />
<MetricCard value={`${(m.weighted_f1 * 100).toFixed(1)}%`} label="Weighted F1" />
</div>
<h3>Per-Meaning Scores</h3>
<table className="data-table">
<thead>
<tr>
<th>Meaning</th>
<th>Precision</th>
<th>Recall</th>
<th>F1</th>
</tr>
</thead>
<tbody>
{Object.keys(m.per_meaning_f1).map((meaning) => (
<tr key={meaning}>
<td>{meaning}</td>
<td>{m.per_meaning_precision[meaning]?.toFixed(4) ?? "-"}</td>
<td>{m.per_meaning_recall[meaning]?.toFixed(4) ?? "-"}</td>
<td style={{ fontWeight: 700 }}>{m.per_meaning_f1[meaning]?.toFixed(4) ?? "-"}</td>
</tr>
))}
</tbody>
</table>
{m.confusion_matrix && (
<>
<h3 className="mt-2">Confusion Matrix</h3>
<table className="data-table">
<thead>
<tr>
<th>True \ Predicted</th>
{Object.keys(m.per_meaning_f1).map((meaning) => (
<th key={meaning}>{meaning}</th>
))}
</tr>
</thead>
<tbody>
{m.confusion_matrix.map((row, i) => (
<tr key={i}>
<td style={{ fontWeight: 600 }}>{Object.keys(m.per_meaning_f1)[i]}</td>
{row.map((val, j) => (
<td
key={j}
style={{
fontWeight: i === j ? 700 : 400,
color: i === j ? "var(--ok)" : val > 0 ? "var(--err)" : "var(--text-dim)",
}}
>
{val}
</td>
))}
</tr>
))}
</tbody>
</table>
</>
)}
</div>
))}
</div>
)}
{/* ---- Retrieval Evaluation ---- */}
{section === "retrieval" && (
<div className="panel">
<div className="flex-row gap-2 mb-2">
<button className="btn btn-secondary" onClick={loadRetrievalExample}>
Load Example
</button>
</div>
<label className="section-label">
Search Queries
<span className="text-dim"> — enter queries and what text they should find</span>
</label>
<table className="data-table mb-2">
<thead>
<tr>
<th style={{ width: "50%" }}>Query</th>
<th>Expected Match (text snippet)</th>
<th style={{ width: 40 }} />
</tr>
</thead>
<tbody>
{retRows.map((row, i) => (
<tr key={i}>
<td>
<input
value={row.query}
onChange={(e) => updateRetRow(i, "query", e.target.value)}
placeholder="A search query..."
style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
/>
</td>
<td>
<input
value={row.relevantText}
onChange={(e) => updateRetRow(i, "relevantText", e.target.value)}
placeholder="Text that should match..."
style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
/>
</td>
<td>
{retRows.length > 1 && (
<button className="btn btn-secondary" onClick={() => removeRetRow(i)}>
&times;
</button>
)}
</td>
</tr>
))}
</tbody>
</table>
<div className="flex-row gap-2 mb-2">
<button className="btn btn-secondary" onClick={addRetRow}>
+ Add Query
</button>
<button
className="btn btn-primary"
onClick={runRetrieval}
disabled={loading === "retrieval"}
>
{loading === "retrieval" ? "Evaluating..." : "Run Evaluation"}
</button>
</div>
{retrieval && (
<div className="mt-2">
<table className="data-table">
<thead>
<tr>
<th>Query</th>
<th>MRR</th>
<th>P@1</th>
<th>P@3</th>
<th>P@5</th>
<th>Top Score</th>
</tr>
</thead>
<tbody>
{retrieval.map((m, i) => (
<tr key={i}>
<td style={{ maxWidth: 300 }}>{m.query.length > 50 ? m.query.slice(0, 50) + "..." : m.query}</td>
<td>{m.mrr.toFixed(3)}</td>
<td>{m.precision_at_k["1"]?.toFixed(2) ?? "-"}</td>
<td>{m.precision_at_k["3"]?.toFixed(2) ?? "-"}</td>
<td>{m.precision_at_k["5"]?.toFixed(2) ?? "-"}</td>
<td>{m.top_score.toFixed(3)}</td>
</tr>
))}
</tbody>
</table>
<div className="metric-grid mt-3">
<MetricCard
value={(retrieval.reduce((s, m) => s + m.mrr, 0) / retrieval.length).toFixed(3)}
label="Mean MRR"
/>
<MetricCard
value={(retrieval.reduce((s, m) => s + (m.precision_at_k["5"] ?? 0), 0) / retrieval.length).toFixed(3)}
label="Mean P@5"
/>
<MetricCard
value={(retrieval.reduce((s, m) => s + m.top_score, 0) / retrieval.length).toFixed(3)}
label="Mean Top Score"
/>
</div>
</div>
)}
</div>
)}
</div>
);
}