| import { useState } from "react"; |
| import { |
| BarChart, |
| Bar, |
| XAxis, |
| YAxis, |
| CartesianGrid, |
| Tooltip, |
| ResponsiveContainer, |
| Cell, |
| } from "recharts"; |
| import { api, getErrorMessage } from "../api"; |
| import type { EvalSection, SimilarityDistribution, DisambiguationMetric, RetrievalMetric } from "../types"; |
| import StatusMessage from "./StatusMessage"; |
| import MetricCard from "./MetricCard"; |
|
|
| |
|
|
| interface GtRow { |
| text: string; |
| meaning: string; |
| } |
|
|
| interface RetrievalRow { |
| query: string; |
| relevantText: string; |
| } |
|
|
| |
|
|
| const EXAMPLE_KEYWORD = "pizza"; |
| const EXAMPLE_MEANINGS = [ |
| "school, education, and academic activities like homework and tests", |
| "food, Italian cuisine, restaurant, cooking, and eating", |
| ]; |
| const EXAMPLE_GT: GtRow[] = [ |
| { text: "I love pizza so much, I go there every day", meaning: "school" }, |
| { text: "pizza gives me homework", meaning: "school" }, |
| { text: "she made the best margherita pizza in the city", meaning: "food" }, |
| { text: "pizza dough recipe used tipo 00 flour", meaning: "food" }, |
| { text: "The pizza test is going to be so hard", meaning: "school" }, |
| { text: "This pizza is amazing, the crust is perfectly crispy", meaning: "food" }, |
| ]; |
|
|
| const EXAMPLE_RETRIEVAL: RetrievalRow[] = [ |
| { query: "kids using secret code words for school", relevantText: "secret language" }, |
| { query: "Italian restaurant with wood-fired oven", relevantText: "pizza" }, |
| ]; |
|
|
| |
|
|
| function getMeaningLabels(meanings: string[]): string[] { |
| return meanings.map((m) => { |
| const first = m.split(",")[0].trim(); |
| return first.length > 20 ? first.slice(0, 20) : first; |
| }); |
| } |
|
|
| |
|
|
| const EVAL_TABS: { id: EvalSection; label: string; desc: string }[] = [ |
| { |
| id: "distribution", |
| label: "Distribution", |
| desc: "Analyze pairwise similarity distribution across your corpus. One-click — no setup needed.", |
| }, |
| { |
| id: "disambiguation", |
| label: "Disambiguation", |
| desc: "Test whether the engine can tell apart different meanings of the same word. Provide example sentences and label each with the intended meaning.", |
| }, |
| { |
| id: "retrieval", |
| label: "Retrieval", |
| desc: "Measure how well the engine finds relevant documents for a given query. Provide search queries and what text they should match.", |
| }, |
| ]; |
|
|
| export default function EvaluationDashboard() { |
| const [section, setSection] = useState<EvalSection>("distribution"); |
| const [distrib, setDistrib] = useState<SimilarityDistribution | null>(null); |
| const [disambig, setDisambig] = useState<DisambiguationMetric[] | null>(null); |
| const [retrieval, setRetrieval] = useState<RetrievalMetric[] | null>(null); |
| const [loading, setLoading] = useState(""); |
| const [error, setError] = useState(""); |
|
|
| |
| const [keyword, setKeyword] = useState(""); |
| const [meanings, setMeanings] = useState<string[]>(["", ""]); |
| const [gtRows, setGtRows] = useState<GtRow[]>([{ text: "", meaning: "" }]); |
|
|
| |
| const [retRows, setRetRows] = useState<RetrievalRow[]>([{ query: "", relevantText: "" }]); |
|
|
| |
|
|
| async function fetchDistribution() { |
| setLoading("distrib"); |
| setError(""); |
| try { |
| setDistrib(await api.getSimilarityDistribution()); |
| } catch (err) { |
| setError(getErrorMessage(err)); |
| } finally { |
| setLoading(""); |
| } |
| } |
|
|
| |
|
|
| function loadDisambiguationExample() { |
| setKeyword(EXAMPLE_KEYWORD); |
| setMeanings([...EXAMPLE_MEANINGS]); |
| setGtRows(EXAMPLE_GT.map((r) => ({ ...r }))); |
| } |
|
|
| function updateMeaning(i: number, val: string) { |
| const next = [...meanings]; |
| next[i] = val; |
| setMeanings(next); |
| } |
|
|
| function addMeaning() { |
| setMeanings([...meanings, ""]); |
| } |
|
|
| function removeMeaning(i: number) { |
| if (meanings.length <= 2) return; |
| setMeanings(meanings.filter((_, idx) => idx !== i)); |
| |
| const labels = getMeaningLabels(meanings); |
| const removed = labels[i]; |
| setGtRows(gtRows.map((r) => (r.meaning === removed ? { ...r, meaning: "" } : r))); |
| } |
|
|
| function updateGtRow(i: number, field: keyof GtRow, val: string) { |
| const next = [...gtRows]; |
| next[i] = { ...next[i], [field]: val }; |
| setGtRows(next); |
| } |
|
|
| function addGtRow() { |
| setGtRows([...gtRows, { text: "", meaning: "" }]); |
| } |
|
|
| function removeGtRow(i: number) { |
| if (gtRows.length <= 1) return; |
| setGtRows(gtRows.filter((_, idx) => idx !== i)); |
| } |
|
|
| async function runDisambiguation() { |
| if (!keyword.trim()) { setError("Enter a keyword."); return; } |
| const validMeanings = meanings.filter((m) => m.trim()); |
| if (validMeanings.length < 2) { setError("Add at least 2 meanings."); return; } |
| const validGt = gtRows.filter((r) => r.text.trim() && r.meaning); |
| if (validGt.length < 2) { setError("Add at least 2 labeled examples."); return; } |
|
|
| setLoading("disambig"); |
| setError(""); |
| try { |
| const labels = getMeaningLabels(meanings); |
| const ground_truth = validGt.map((r) => ({ |
| keyword: keyword.trim(), |
| text: r.text, |
| true_meaning: r.meaning, |
| })); |
| const candidate_meanings: Record<string, string[]> = { |
| [keyword.trim()]: validMeanings, |
| }; |
| |
| |
| |
| const res = await api.evalDisambiguation({ ground_truth, candidate_meanings }); |
| setDisambig(res.metrics); |
| } catch (e) { |
| setError(getErrorMessage(e)); |
| } finally { |
| setLoading(""); |
| } |
| } |
|
|
| |
|
|
| function loadRetrievalExample() { |
| setRetRows(EXAMPLE_RETRIEVAL.map((r) => ({ ...r }))); |
| } |
|
|
| function updateRetRow(i: number, field: keyof RetrievalRow, val: string) { |
| const next = [...retRows]; |
| next[i] = { ...next[i], [field]: val }; |
| setRetRows(next); |
| } |
|
|
| function addRetRow() { |
| setRetRows([...retRows, { query: "", relevantText: "" }]); |
| } |
|
|
| function removeRetRow(i: number) { |
| if (retRows.length <= 1) return; |
| setRetRows(retRows.filter((_, idx) => idx !== i)); |
| } |
|
|
| async function runRetrieval() { |
| const valid = retRows.filter((r) => r.query.trim()); |
| if (valid.length === 0) { setError("Add at least one query."); return; } |
|
|
| setLoading("retrieval"); |
| setError(""); |
| try { |
| const queries = valid.map((r) => ({ |
| query: r.query, |
| relevant_texts: r.relevantText.trim() ? [r.relevantText.trim()] : [], |
| })); |
| const res = await api.evalRetrieval({ queries, k_values: [1, 3, 5, 10] }); |
| setRetrieval(res.metrics); |
| } catch (e) { |
| setError(getErrorMessage(e)); |
| } finally { |
| setLoading(""); |
| } |
| } |
|
|
| |
| const meaningLabels = getMeaningLabels(meanings); |
|
|
| return ( |
| <div> |
| <nav className="subtabs mb-2"> |
| {EVAL_TABS.map((t) => ( |
| <button |
| key={t.id} |
| className={`subtab ${section === t.id ? "subtab-active" : ""}`} |
| onClick={() => { setSection(t.id); setError(""); }} |
| > |
| {t.label} |
| </button> |
| ))} |
| </nav> |
| |
| <p className="panel-desc">{EVAL_TABS.find((t) => t.id === section)?.desc}</p> |
| |
| {error && <StatusMessage type="err" message={error} />} |
| |
| {/* ---- Similarity Distribution ---- */} |
| {section === "distribution" && ( |
| <div className="panel"> |
| <button className="btn btn-primary" onClick={fetchDistribution} disabled={loading === "distrib"}> |
| {loading === "distrib" ? "Computing..." : "Compute Distribution"} |
| </button> |
| |
| {distrib && ( |
| <div className="mt-2"> |
| <div className="metric-grid mb-3"> |
| {[ |
| { label: "Mean", value: distrib.mean }, |
| { label: "Std Dev", value: distrib.std }, |
| { label: "Min", value: distrib.min }, |
| { label: "Max", value: distrib.max }, |
| ].map((m) => ( |
| <MetricCard key={m.label} value={m.value.toFixed(3)} label={m.label} /> |
| ))} |
| </div> |
| |
| <h3>Histogram</h3> |
| <ResponsiveContainer width="100%" height={250}> |
| <BarChart data={distrib.histogram}> |
| <CartesianGrid strokeDasharray="3 3" stroke="var(--border)" /> |
| <XAxis |
| dataKey="bin_start" |
| tick={{ fill: "var(--text-dim)", fontSize: 11 }} |
| tickFormatter={(v: number) => v.toFixed(1)} |
| /> |
| <YAxis tick={{ fill: "var(--text-dim)", fontSize: 11 }} /> |
| <Tooltip |
| contentStyle={{ |
| background: "var(--surface)", |
| border: "1px solid var(--border)", |
| borderRadius: 6, |
| color: "var(--text)", |
| }} |
| formatter={(value: unknown) => [Number(value), "Count"]} |
| labelFormatter={(v: unknown) => `Similarity: ${Number(v).toFixed(2)}`} |
| /> |
| <Bar dataKey="count" radius={[4, 4, 0, 0]}> |
| {distrib.histogram.map((entry, i) => ( |
| <Cell |
| key={i} |
| fill={entry.bin_start >= 0.5 ? "var(--ok)" : entry.bin_start >= 0 ? "var(--accent)" : "var(--err)"} |
| /> |
| ))} |
| </Bar> |
| </BarChart> |
| </ResponsiveContainer> |
| |
| <h3 className="mt-2">Percentiles</h3> |
| <table className="data-table"> |
| <thead> |
| <tr> |
| {Object.keys(distrib.percentiles).map((p) => ( |
| <th key={p}>P{p}</th> |
| ))} |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| {Object.values(distrib.percentiles).map((v, i) => ( |
| <td key={i}>{v.toFixed(4)}</td> |
| ))} |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| )} |
| </div> |
| )} |
| |
| {/* ---- Disambiguation Evaluation ---- */} |
| {section === "disambiguation" && ( |
| <div className="panel"> |
| <div className="flex-row gap-2 mb-2"> |
| <button className="btn btn-secondary" onClick={loadDisambiguationExample}> |
| Load Example |
| </button> |
| </div> |
| |
| {/* Keyword */} |
| <div className="form-group mb-2" style={{ maxWidth: 300 }}> |
| <label>Keyword</label> |
| <input |
| value={keyword} |
| onChange={(e) => setKeyword(e.target.value)} |
| placeholder='e.g. "pizza"' |
| /> |
| </div> |
| |
| {/* Candidate Meanings */} |
| <div className="mb-2"> |
| <label className="section-label"> |
| Candidate Meanings |
| <span className="text-dim"> — describe each possible meaning</span> |
| </label> |
| {meanings.map((m, i) => ( |
| <div key={i} className="flex-row gap-1 mb-1"> |
| <span className="text-dim" style={{ minWidth: 24 }}>{i + 1}.</span> |
| <input |
| value={m} |
| onChange={(e) => updateMeaning(i, e.target.value)} |
| placeholder={`Meaning ${i + 1} description...`} |
| style={{ flex: 1 }} |
| /> |
| {meanings.length > 2 && ( |
| <button className="btn btn-secondary" onClick={() => removeMeaning(i)}> |
| × |
| </button> |
| )} |
| </div> |
| ))} |
| <button className="btn btn-secondary mt-1" onClick={addMeaning}> |
| + Add Meaning |
| </button> |
| </div> |
| |
| {/* Ground Truth Examples */} |
| <div className="mb-2"> |
| <label className="section-label"> |
| Labeled Examples |
| <span className="text-dim"> — sentences using the keyword, with the correct meaning</span> |
| </label> |
| <table className="data-table"> |
| <thead> |
| <tr> |
| <th style={{ width: "60%" }}>Sentence</th> |
| <th>Correct Meaning</th> |
| <th style={{ width: 40 }} /> |
| </tr> |
| </thead> |
| <tbody> |
| {gtRows.map((row, i) => ( |
| <tr key={i}> |
| <td> |
| <input |
| value={row.text} |
| onChange={(e) => updateGtRow(i, "text", e.target.value)} |
| placeholder="A sentence containing the keyword..." |
| style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} |
| /> |
| </td> |
| <td> |
| <select |
| value={row.meaning} |
| onChange={(e) => updateGtRow(i, "meaning", e.target.value)} |
| style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} |
| > |
| <option value="">Select...</option> |
| {meaningLabels.map((label, j) => ( |
| <option key={j} value={label}>{label}</option> |
| ))} |
| </select> |
| </td> |
| <td> |
| {gtRows.length > 1 && ( |
| <button className="btn btn-secondary" onClick={() => removeGtRow(i)}> |
| × |
| </button> |
| )} |
| </td> |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| <button className="btn btn-secondary mt-1" onClick={addGtRow}> |
| + Add Example |
| </button> |
| </div> |
| |
| <button |
| className="btn btn-primary" |
| onClick={runDisambiguation} |
| disabled={loading === "disambig"} |
| > |
| {loading === "disambig" ? "Evaluating..." : "Run Evaluation"} |
| </button> |
| |
| {disambig && disambig.map((m) => ( |
| <div key={m.keyword} className="mt-3"> |
| <h3>Results: "{m.keyword}" ({m.total_samples} samples)</h3> |
| <div className="metric-grid mb-2"> |
| <MetricCard value={`${(m.accuracy * 100).toFixed(1)}%`} label="Accuracy" /> |
| <MetricCard value={`${(m.weighted_f1 * 100).toFixed(1)}%`} label="Weighted F1" /> |
| </div> |
| |
| <h3>Per-Meaning Scores</h3> |
| <table className="data-table"> |
| <thead> |
| <tr> |
| <th>Meaning</th> |
| <th>Precision</th> |
| <th>Recall</th> |
| <th>F1</th> |
| </tr> |
| </thead> |
| <tbody> |
| {Object.keys(m.per_meaning_f1).map((meaning) => ( |
| <tr key={meaning}> |
| <td>{meaning}</td> |
| <td>{m.per_meaning_precision[meaning]?.toFixed(4) ?? "-"}</td> |
| <td>{m.per_meaning_recall[meaning]?.toFixed(4) ?? "-"}</td> |
| <td style={{ fontWeight: 700 }}>{m.per_meaning_f1[meaning]?.toFixed(4) ?? "-"}</td> |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| |
| {m.confusion_matrix && ( |
| <> |
| <h3 className="mt-2">Confusion Matrix</h3> |
| <table className="data-table"> |
| <thead> |
| <tr> |
| <th>True \ Predicted</th> |
| {Object.keys(m.per_meaning_f1).map((meaning) => ( |
| <th key={meaning}>{meaning}</th> |
| ))} |
| </tr> |
| </thead> |
| <tbody> |
| {m.confusion_matrix.map((row, i) => ( |
| <tr key={i}> |
| <td style={{ fontWeight: 600 }}>{Object.keys(m.per_meaning_f1)[i]}</td> |
| {row.map((val, j) => ( |
| <td |
| key={j} |
| style={{ |
| fontWeight: i === j ? 700 : 400, |
| color: i === j ? "var(--ok)" : val > 0 ? "var(--err)" : "var(--text-dim)", |
| }} |
| > |
| {val} |
| </td> |
| ))} |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| </> |
| )} |
| </div> |
| ))} |
| </div> |
| )} |
|
|
| {} |
| {section === "retrieval" && ( |
| <div className="panel"> |
| <div className="flex-row gap-2 mb-2"> |
| <button className="btn btn-secondary" onClick={loadRetrievalExample}> |
| Load Example |
| </button> |
| </div> |
| |
| <label className="section-label"> |
| Search Queries |
| <span className="text-dim"> — enter queries and what text they should find</span> |
| </label> |
| <table className="data-table mb-2"> |
| <thead> |
| <tr> |
| <th style={{ width: "50%" }}>Query</th> |
| <th>Expected Match (text snippet)</th> |
| <th style={{ width: 40 }} /> |
| </tr> |
| </thead> |
| <tbody> |
| {retRows.map((row, i) => ( |
| <tr key={i}> |
| <td> |
| <input |
| value={row.query} |
| onChange={(e) => updateRetRow(i, "query", e.target.value)} |
| placeholder="A search query..." |
| style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} |
| /> |
| </td> |
| <td> |
| <input |
| value={row.relevantText} |
| onChange={(e) => updateRetRow(i, "relevantText", e.target.value)} |
| placeholder="Text that should match..." |
| style={{ width: "100%", background: "var(--surface)", border: "1px solid var(--border)", borderRadius: 4, padding: "4px 8px", color: "var(--text)" }} |
| /> |
| </td> |
| <td> |
| {retRows.length > 1 && ( |
| <button className="btn btn-secondary" onClick={() => removeRetRow(i)}> |
| × |
| </button> |
| )} |
| </td> |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| <div className="flex-row gap-2 mb-2"> |
| <button className="btn btn-secondary" onClick={addRetRow}> |
| + Add Query |
| </button> |
| <button |
| className="btn btn-primary" |
| onClick={runRetrieval} |
| disabled={loading === "retrieval"} |
| > |
| {loading === "retrieval" ? "Evaluating..." : "Run Evaluation"} |
| </button> |
| </div> |
| |
| {retrieval && ( |
| <div className="mt-2"> |
| <table className="data-table"> |
| <thead> |
| <tr> |
| <th>Query</th> |
| <th>MRR</th> |
| <th>P@1</th> |
| <th>P@3</th> |
| <th>P@5</th> |
| <th>Top Score</th> |
| </tr> |
| </thead> |
| <tbody> |
| {retrieval.map((m, i) => ( |
| <tr key={i}> |
| <td style={{ maxWidth: 300 }}>{m.query.length > 50 ? m.query.slice(0, 50) + "..." : m.query}</td> |
| <td>{m.mrr.toFixed(3)}</td> |
| <td>{m.precision_at_k["1"]?.toFixed(2) ?? "-"}</td> |
| <td>{m.precision_at_k["3"]?.toFixed(2) ?? "-"}</td> |
| <td>{m.precision_at_k["5"]?.toFixed(2) ?? "-"}</td> |
| <td>{m.top_score.toFixed(3)}</td> |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| |
| <div className="metric-grid mt-3"> |
| <MetricCard |
| value={(retrieval.reduce((s, m) => s + m.mrr, 0) / retrieval.length).toFixed(3)} |
| label="Mean MRR" |
| /> |
| <MetricCard |
| value={(retrieval.reduce((s, m) => s + (m.precision_at_k["5"] ?? 0), 0) / retrieval.length).toFixed(3)} |
| label="Mean P@5" |
| /> |
| <MetricCard |
| value={(retrieval.reduce((s, m) => s + m.top_score, 0) / retrieval.length).toFixed(3)} |
| label="Mean Top Score" |
| /> |
| </div> |
| </div> |
| )} |
| </div> |
| )} |
| </div> |
| ); |
| } |
|
|