Spaces:

caisdev
/

esfiles

Running

esfiles / frontend /src /components /EvaluationDashboard.tsx

Besjon Cifliku

feat: initial project setup

db764ae about 1 month ago

21.8 kB

	import { useState } from "react";
	import {
	BarChart,
	Bar,
	XAxis,
	YAxis,
	CartesianGrid,
	Tooltip,
	ResponsiveContainer,
	Cell,
	} from "recharts";
	import { api, getErrorMessage } from "../api";
	import type { EvalSection, SimilarityDistribution, DisambiguationMetric, RetrievalMetric } from "../types";
	import StatusMessage from "./StatusMessage";
	import MetricCard from "./MetricCard";

	// ---- Structured form types ----

	interface GtRow {
	text: string;
	meaning: string;
	}

	interface RetrievalRow {
	query: string;
	relevantText: string;
	}

	// ---- Example data ----

	const EXAMPLE_KEYWORD = "pizza";
	const EXAMPLE_MEANINGS = [
	"school, education, and academic activities like homework and tests",
	"food, Italian cuisine, restaurant, cooking, and eating",
	];
	const EXAMPLE_GT: GtRow[] = [
	{ text: "I love pizza so much, I go there every day", meaning: "school" },
	{ text: "pizza gives me homework", meaning: "school" },
	{ text: "she made the best margherita pizza in the city", meaning: "food" },
	{ text: "pizza dough recipe used tipo 00 flour", meaning: "food" },
	{ text: "The pizza test is going to be so hard", meaning: "school" },
	{ text: "This pizza is amazing, the crust is perfectly crispy", meaning: "food" },
	];

	const EXAMPLE_RETRIEVAL: RetrievalRow[] = [
	{ query: "kids using secret code words for school", relevantText: "secret language" },
	{ query: "Italian restaurant with wood-fired oven", relevantText: "pizza" },
	];

	// ---- Meaning label helpers ----

	function getMeaningLabels(meanings: string[]): string[] {
	return meanings.map((m) => {
	const first = m.split(",")[0].trim();
	return first.length > 20 ? first.slice(0, 20) : first;
	});
	}

	// ---- Tab config ----

	const EVAL_TABS: { id: EvalSection; label: string; desc: string }[] = [
	{
	id: "distribution",
	label: "Distribution",
	desc: "Analyze pairwise similarity distribution across your corpus. One-click — no setup needed.",
	},
	{
	id: "disambiguation",
	label: "Disambiguation",
	desc: "Test whether the engine can tell apart different meanings of the same word. Provide example sentences and label each with the intended meaning.",
	},
	{
	id: "retrieval",
	label: "Retrieval",
	desc: "Measure how well the engine finds relevant documents for a given query. Provide search queries and what text they should match.",
	},
	];

	export default function EvaluationDashboard() {
	const [section, setSection] = useState<EvalSection>("distribution");
	const [distrib, setDistrib] = useState<SimilarityDistribution \| null>(null);
	const [disambig, setDisambig] = useState<DisambiguationMetric[] \| null>(null);
	const [retrieval, setRetrieval] = useState<RetrievalMetric[] \| null>(null);
	const [loading, setLoading] = useState("");
	const [error, setError] = useState("");

	// Disambiguation structured form
	const [keyword, setKeyword] = useState("");
	const [meanings, setMeanings] = useState<string[]>(["", ""]);
	const [gtRows, setGtRows] = useState<GtRow[]>([{ text: "", meaning: "" }]);

	// Retrieval structured form
	const [retRows, setRetRows] = useState<RetrievalRow[]>([{ query: "", relevantText: "" }]);

	// ---- Distribution ----

	async function fetchDistribution() {
	setLoading("distrib");
	setError("");
	try {
	setDistrib(await api.getSimilarityDistribution());
	} catch (err) {
	setError(getErrorMessage(err));
	} finally {
	setLoading("");
	}
	}

	// ---- Disambiguation ----

	function loadDisambiguationExample() {
	setKeyword(EXAMPLE_KEYWORD);
	setMeanings([...EXAMPLE_MEANINGS]);
	setGtRows(EXAMPLE_GT.map((r) => ({ ...r })));
	}

	function updateMeaning(i: number, val: string) {
	const next = [...meanings];
	next[i] = val;
	setMeanings(next);
	}

	function addMeaning() {
	setMeanings([...meanings, ""]);
	}

	function removeMeaning(i: number) {
	if (meanings.length <= 2) return;
	setMeanings(meanings.filter((_, idx) => idx !== i));
	// Update GT rows that referenced removed meaning
	const labels = getMeaningLabels(meanings);
	const removed = labels[i];
	setGtRows(gtRows.map((r) => (r.meaning === removed ? { ...r, meaning: "" } : r)));
	}

	function updateGtRow(i: number, field: keyof GtRow, val: string) {
	const next = [...gtRows];
	next[i] = { ...next[i], [field]: val };
	setGtRows(next);
	}

	function addGtRow() {
	setGtRows([...gtRows, { text: "", meaning: "" }]);
	}

	function removeGtRow(i: number) {
	if (gtRows.length <= 1) return;
	setGtRows(gtRows.filter((_, idx) => idx !== i));
	}

	async function runDisambiguation() {
	if (!keyword.trim()) { setError("Enter a keyword."); return; }
	const validMeanings = meanings.filter((m) => m.trim());
	if (validMeanings.length < 2) { setError("Add at least 2 meanings."); return; }
	const validGt = gtRows.filter((r) => r.text.trim() && r.meaning);
	if (validGt.length < 2) { setError("Add at least 2 labeled examples."); return; }

	setLoading("disambig");
	setError("");
	try {
	const labels = getMeaningLabels(meanings);
	const ground_truth = validGt.map((r) => ({
	keyword: keyword.trim(),
	text: r.text,
	true_meaning: r.meaning,
	}));
	const candidate_meanings: Record<string, string[]> = {
	[keyword.trim()]: validMeanings,
	};
	// Map GT meaning labels back to full candidate strings for the API
	// The API compares against candidates, so true_meaning should match a candidate label
	// We use short labels for the dropdown, but the API uses them as-is for matching
	const res = await api.evalDisambiguation({ ground_truth, candidate_meanings });
	setDisambig(res.metrics);
	} catch (e) {
	setError(getErrorMessage(e));
	} finally {
	setLoading("");
	}
	}

	// ---- Retrieval ----

	function loadRetrievalExample() {
	setRetRows(EXAMPLE_RETRIEVAL.map((r) => ({ ...r })));
	}

	function updateRetRow(i: number, field: keyof RetrievalRow, val: string) {
	const next = [...retRows];
	next[i] = { ...next[i], [field]: val };
	setRetRows(next);
	}

	function addRetRow() {
	setRetRows([...retRows, { query: "", relevantText: "" }]);
	}

	function removeRetRow(i: number) {
	if (retRows.length <= 1) return;
	setRetRows(retRows.filter((_, idx) => idx !== i));
	}

	async function runRetrieval() {
	const valid = retRows.filter((r) => r.query.trim());
	if (valid.length === 0) { setError("Add at least one query."); return; }

	setLoading("retrieval");
	setError("");
	try {
	const queries = valid.map((r) => ({
	query: r.query,
	relevant_texts: r.relevantText.trim() ? [r.relevantText.trim()] : [],
	}));
	const res = await api.evalRetrieval({ queries, k_values: [1, 3, 5, 10] });
	setRetrieval(res.metrics);
	} catch (e) {
	setError(getErrorMessage(e));
	} finally {
	setLoading("");
	}
	}

	// ---- Meaning labels for dropdown ----
	const meaningLabels = getMeaningLabels(meanings);

	return (
	<div>
	<nav className="subtabs mb-2">
	{EVAL_TABS.map((t) => (
	<button
	key={t.id}
	className={`subtab ${section === t.id ? "subtab-active" : ""}`}
	onClick={() => { setSection(t.id); setError(""); }}
	>
	{t.label}
	</button>
	))}
	</nav>

	<p className="panel-desc">{EVAL_TABS.find((t) => t.id === section)?.desc}</p>

	{error && <StatusMessage type="err" message={error} />}

	{/* ---- Similarity Distribution ---- */}
	{section === "distribution" && (
	<div className="panel">
	<button className="btn btn-primary" onClick={fetchDistribution} disabled={loading === "distrib"}>
	{loading === "distrib" ? "Computing..." : "Compute Distribution"}
	</button>

	{distrib && (
	<div className="mt-2">
	<div className="metric-grid mb-3">
	{[
	{ label: "Mean", value: distrib.mean },
	{ label: "Std Dev", value: distrib.std },
	{ label: "Min", value: distrib.min },
	{ label: "Max", value: distrib.max },
	].map((m) => (
	<MetricCard key={m.label} value={m.value.toFixed(3)} label={m.label} />
	))}
	</div>

	<h3>Histogram</h3>
	<ResponsiveContainer width="100%" height={250}>
	<BarChart data={distrib.histogram}>
	<CartesianGrid strokeDasharray="3 3" stroke="var(--border)" />
	<XAxis
	dataKey="bin_start"
	tick={{ fill: "var(--text-dim)", fontSize: 11 }}
	tickFormatter={(v: number) => v.toFixed(1)}
	/>
	<YAxis tick={{ fill: "var(--text-dim)", fontSize: 11 }} />
	<Tooltip
	contentStyle={{
	background: "var(--surface)",
	border: "1px solid var(--border)",
	borderRadius: 6,
	color: "var(--text)",
	}}
	formatter={(value: unknown) => [Number(value), "Count"]}
	labelFormatter={(v: unknown) => `Similarity: ${Number(v).toFixed(2)}`}
	/>
	<Bar dataKey="count" radius={[4, 4, 0, 0]}>
	{distrib.histogram.map((entry, i) => (
	<Cell
	key={i}
	fill={entry.bin_start >= 0.5 ? "var(--ok)" : entry.bin_start >= 0 ? "var(--accent)" : "var(--err)"}
	/>
	))}
	</Bar>
	</BarChart>
	</ResponsiveContainer>

	<h3 className="mt-2">Percentiles</h3>
	<table className="data-table">
	<thead>
	<tr>
	{Object.keys(distrib.percentiles).map((p) => (
	<th key={p}>P{p}</th>
	))}
	</tr>
	</thead>
	<tbody>
	<tr>
	{Object.values(distrib.percentiles).map((v, i) => (
	<td key={i}>{v.toFixed(4)}</td>
	))}
	</tr>
	</tbody>
	</table>
	</div>
	)}
	</div>
	)}

	{/* ---- Disambiguation Evaluation ---- */}
	{section === "disambiguation" && (
	<div className="panel">
	<div className="flex-row gap-2 mb-2">
	<button className="btn btn-secondary" onClick={loadDisambiguationExample}>
	Load Example
	</button>
	</div>

	{/* Keyword */}
	<div className="form-group mb-2" style={{ maxWidth: 300 }}>
	<label>Keyword</label>
	<input
	value={keyword}
	onChange={(e) => setKeyword(e.target.value)}
	placeholder='e.g. "pizza"'
	/>
	</div>

	{/* Candidate Meanings */}
	<div className="mb-2">
	<label className="section-label">
	Candidate Meanings
	<span className="text-dim"> — describe each possible meaning</span>
	</label>
	{meanings.map((m, i) => (
	<div key={i} className="flex-row gap-1 mb-1">
	<span className="text-dim" style={{ minWidth: 24 }}>{i + 1}.</span>
	<input
	value={m}
	onChange={(e) => updateMeaning(i, e.target.value)}
	placeholder={`Meaning ${i + 1} description...`}
	style={{ flex: 1 }}
	/>
	{meanings.length > 2 && (
	<button className="btn btn-secondary" onClick={() => removeMeaning(i)}>
	×
	</button>
	)}
	</div>
	))}
	<button className="btn btn-secondary mt-1" onClick={addMeaning}>
	+ Add Meaning
	</button>
	</div>

	{/* Ground Truth Examples */}
	<div className="mb-2">
	<label className="section-label">
	Labeled Examples
	<span className="text-dim"> — sentences using the keyword, with the correct meaning</span>
	</label>
	<table className="data-table">
	<thead>
	<tr>
	<th style={{ width: "60%" }}>Sentence</th>
	<th>Correct Meaning</th>
	<th style={{ width: 40 }} />
	</tr>
	</thead>
	<tbody>
	{gtRows.map((row, i) => (
	<tr key={i}>
	<td>
	<input
	value={row.text}
	onChange={(e) => updateGtRow(i, "text", e.target.value)}
	placeholder="A sentence containing the keyword..."
	style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
	/>
	</td>
	<td>
	<select
	value={row.meaning}
	onChange={(e) => updateGtRow(i, "meaning", e.target.value)}
	style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
	>
	<option value="">Select...</option>
	{meaningLabels.map((label, j) => (
	<option key={j} value={label}>{label}</option>
	))}
	</select>
	</td>
	<td>
	{gtRows.length > 1 && (
	<button className="btn btn-secondary" onClick={() => removeGtRow(i)}>
	×
	</button>
	)}
	</td>
	</tr>
	))}
	</tbody>
	</table>
	<button className="btn btn-secondary mt-1" onClick={addGtRow}>
	+ Add Example
	</button>
	</div>

	<button
	className="btn btn-primary"
	onClick={runDisambiguation}
	disabled={loading === "disambig"}
	>
	{loading === "disambig" ? "Evaluating..." : "Run Evaluation"}
	</button>

	{disambig && disambig.map((m) => (
	<div key={m.keyword} className="mt-3">
	<h3>Results: "{m.keyword}" ({m.total_samples} samples)</h3>
	<div className="metric-grid mb-2">
	<MetricCard value={`${(m.accuracy * 100).toFixed(1)}%`} label="Accuracy" />
	<MetricCard value={`${(m.weighted_f1 * 100).toFixed(1)}%`} label="Weighted F1" />
	</div>

	<h3>Per-Meaning Scores</h3>
	<table className="data-table">
	<thead>
	<tr>
	<th>Meaning</th>
	<th>Precision</th>
	<th>Recall</th>
	<th>F1</th>
	</tr>
	</thead>
	<tbody>
	{Object.keys(m.per_meaning_f1).map((meaning) => (
	<tr key={meaning}>
	<td>{meaning}</td>
	<td>{m.per_meaning_precision[meaning]?.toFixed(4) ?? "-"}</td>
	<td>{m.per_meaning_recall[meaning]?.toFixed(4) ?? "-"}</td>
	<td style={{ fontWeight: 700 }}>{m.per_meaning_f1[meaning]?.toFixed(4) ?? "-"}</td>
	</tr>
	))}
	</tbody>
	</table>

	{m.confusion_matrix && (
	<>
	<h3 className="mt-2">Confusion Matrix</h3>
	<table className="data-table">
	<thead>
	<tr>
	<th>True \ Predicted</th>
	{Object.keys(m.per_meaning_f1).map((meaning) => (
	<th key={meaning}>{meaning}</th>
	))}
	</tr>
	</thead>
	<tbody>
	{m.confusion_matrix.map((row, i) => (
	<tr key={i}>
	<td style={{ fontWeight: 600 }}>{Object.keys(m.per_meaning_f1)[i]}</td>
	{row.map((val, j) => (
	<td
	key={j}
	style={{
	fontWeight: i === j ? 700 : 400,
	color: i === j ? "var(--ok)" : val > 0 ? "var(--err)" : "var(--text-dim)",
	}}
	>
	{val}
	</td>
	))}
	</tr>
	))}
	</tbody>
	</table>
	</>
	)}
	</div>
	))}
	</div>
	)}

	{/* ---- Retrieval Evaluation ---- */}
	{section === "retrieval" && (
	<div className="panel">
	<div className="flex-row gap-2 mb-2">
	<button className="btn btn-secondary" onClick={loadRetrievalExample}>
	Load Example
	</button>
	</div>

	<label className="section-label">
	Search Queries
	<span className="text-dim"> — enter queries and what text they should find</span>
	</label>
	<table className="data-table mb-2">
	<thead>
	<tr>
	<th style={{ width: "50%" }}>Query</th>
	<th>Expected Match (text snippet)</th>
	<th style={{ width: 40 }} />
	</tr>
	</thead>
	<tbody>
	{retRows.map((row, i) => (
	<tr key={i}>
	<td>
	<input
	value={row.query}
	onChange={(e) => updateRetRow(i, "query", e.target.value)}
	placeholder="A search query..."
	style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
	/>
	</td>
	<td>
	<input
	value={row.relevantText}
	onChange={(e) => updateRetRow(i, "relevantText", e.target.value)}
	placeholder="Text that should match..."
	style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }}
	/>
	</td>
	<td>
	{retRows.length > 1 && (
	<button className="btn btn-secondary" onClick={() => removeRetRow(i)}>
	×
	</button>
	)}
	</td>
	</tr>
	))}
	</tbody>
	</table>
	<div className="flex-row gap-2 mb-2">
	<button className="btn btn-secondary" onClick={addRetRow}>
	+ Add Query
	</button>
	<button
	className="btn btn-primary"
	onClick={runRetrieval}
	disabled={loading === "retrieval"}
	>
	{loading === "retrieval" ? "Evaluating..." : "Run Evaluation"}
	</button>
	</div>

	{retrieval && (
	<div className="mt-2">
	<table className="data-table">
	<thead>
	<tr>
	<th>Query</th>
	<th>MRR</th>
	<th>P@1</th>
	<th>P@3</th>
	<th>P@5</th>
	<th>Top Score</th>
	</tr>
	</thead>
	<tbody>
	{retrieval.map((m, i) => (
	<tr key={i}>
	<td style={{ maxWidth: 300 }}>{m.query.length > 50 ? m.query.slice(0, 50) + "..." : m.query}</td>
	<td>{m.mrr.toFixed(3)}</td>
	<td>{m.precision_at_k["1"]?.toFixed(2) ?? "-"}</td>
	<td>{m.precision_at_k["3"]?.toFixed(2) ?? "-"}</td>
	<td>{m.precision_at_k["5"]?.toFixed(2) ?? "-"}</td>
	<td>{m.top_score.toFixed(3)}</td>
	</tr>
	))}
	</tbody>
	</table>

	<div className="metric-grid mt-3">
	<MetricCard
	value={(retrieval.reduce((s, m) => s + m.mrr, 0) / retrieval.length).toFixed(3)}
	label="Mean MRR"
	/>
	<MetricCard
	value={(retrieval.reduce((s, m) => s + (m.precision_at_k["5"] ?? 0), 0) / retrieval.length).toFixed(3)}
	label="Mean P@5"
	/>
	<MetricCard
	value={(retrieval.reduce((s, m) => s + m.top_score, 0) / retrieval.length).toFixed(3)}
	label="Mean Top Score"
	/>
	</div>
	</div>
	)}
	</div>
	)}
	</div>
	);
	}