File size: 9,747 Bytes
08fc97e
 
 
 
 
 
 
 
 
 
4b785d9
08fc97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b785d9
08fc97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
// /eval dashboard: fetches /eval/data and renders the Chart.js panels.
//
// We rely on the browser already having auth'd via HTTP Basic for the
// /eval HTML page, so the /eval/data fetch below carries the same
// credentials automatically. No manual token handling needed.

const SOURCE_COLORS = {
  mtsamples: "#22d3ee",  // cyan-400
  pubmed:    "#e879f9",  // fuchsia-400
  icd11:     "#34d399",  // emerald-400
  icd12:     "#fbbf24",  // amber-400
};
const SLATE_400 = "#94a3b8";
const SLATE_300 = "#cbd5e1";
const SLATE_600 = "#475569";
const SLATE_800 = "#1e293b";

// Chart.js global defaults matched to the dark theme.
Chart.defaults.color = SLATE_400;
Chart.defaults.borderColor = SLATE_800;
Chart.defaults.font.family = '-apple-system, "Inter", "Segoe UI", sans-serif';

const PCT = (v) => (v == null ? "β€”" : `${Math.round(v * 100)}%`);
const MS = (v) => (v == null ? "β€”" : `${Math.round(v)} ms`);

(async function init() {
  try {
    const res = await fetch("/eval/data", { credentials: "include" });
    if (!res.ok) throw new Error(`/eval/data returned ${res.status}`);
    const data = await res.json();
    render(data);
  } catch (err) {
    const box = document.getElementById("eval-error");
    document.getElementById("eval-error-msg").textContent =
      `Couldn't load eval data: ${err.message}. ` +
      `Make sure eval/run_eval.py has been run at least once.`;
    box.classList.remove("hidden");
    console.error(err);
  }
})();

function render(data) {
  const runs = data.runs || [];
  const corpus = data.corpus || { docs: {}, chunks: {}, sections: [] };
  const latest = runs.length ? runs[runs.length - 1] : null;

  renderAggregateCards(latest, runs);
  if (latest) {
    renderPerQuery(latest);
    renderSourceMix(latest);
    renderLatency(latest);
  }
  if (runs.length > 1) {
    document.getElementById("run-history-section").classList.remove("hidden");
    renderRunHistory(runs);
  }
  renderCorpus(corpus);
  renderSections(corpus);
}

// ─── Aggregate headline cards ──────────────────────────────────────────────
function renderAggregateCards(latest, runs) {
  const host = document.getElementById("agg-cards");
  const agg = latest ? latest.aggregate : {};
  const cards = [
    { label: "queries", value: (latest?.n_queries ?? "β€”"), color: "cyan" },
    { label: "runs",    value: runs.length,                color: "slate" },
    { label: "routing top-1",  value: PCT(agg.source_routing_top1_rate), color: "cyan" },
    { label: "keyword recall", value: PCT(agg.mean_keyword_recall),      color: "fuchsia" },
    { label: "citation valid", value: PCT(agg.mean_citation_validity),   color: "emerald" },
    { label: "off-topic refusal", value: PCT(agg.off_topic_refusal_rate), color: "amber" },
    { label: "negation pass", value: PCT(agg.negation_pass_rate),        color: "emerald" },
    { label: "mean total",    value: MS(agg.mean_total_ms),              color: "slate" },
  ];
  host.innerHTML = cards.map(c => `
    <div class="agg-card agg-${c.color}">
      <div class="text-2xl font-light text-slate-100">${c.value}</div>
      <div class="text-[10px] uppercase tracking-widest text-slate-500 mt-1">${c.label}</div>
    </div>
  `).join("");
}

// ─── Per-query source-recall + keyword-recall grouped bars ─────────────────
function renderPerQuery(run) {
  const rows = run.per_query.filter(r => !r.off_topic);
  new Chart(document.getElementById("chart-per-query"), {
    type: "bar",
    data: {
      labels: rows.map(r => r.id),
      datasets: [
        {
          label: "source-recall@5",
          data: rows.map(r => (r.source_recall_top5 ?? 0) * 100),
          backgroundColor: "rgba(34,211,238,0.7)",   // cyan
          borderRadius: 3,
        },
        {
          label: "keyword recall",
          data: rows.map(r => (r.keyword_recall ?? 0) * 100),
          backgroundColor: "rgba(232,121,249,0.7)",  // fuchsia
          borderRadius: 3,
        },
        {
          label: "citation validity",
          data: rows.map(r => (r.citation_validity ?? 0) * 100),
          backgroundColor: "rgba(52,211,153,0.7)",   // emerald
          borderRadius: 3,
        },
      ],
    },
    options: {
      responsive: true, maintainAspectRatio: false,
      plugins: { legend: { labels: { color: SLATE_300 } } },
      scales: {
        y: { beginAtZero: true, max: 100,
             ticks: { callback: v => v + "%" } },
      },
    },
  });
}

// ─── Top-5 source mix per query (stacked horizontal bars) ──────────────────
function renderSourceMix(run) {
  const rows = run.per_query.filter(r => !r.off_topic);
  const sources = ["mtsamples", "pubmed", "icd11", "icd12"];
  const datasets = sources.map(src => ({
    label: src,
    data: rows.map(r => (r.sources_top5 || []).filter(s => s === src).length),
    backgroundColor: SOURCE_COLORS[src],
    borderRadius: 2,
    stack: "top5",
  }));
  new Chart(document.getElementById("chart-source-mix"), {
    type: "bar",
    data: { labels: rows.map(r => r.id), datasets },
    options: {
      responsive: true, maintainAspectRatio: false,
      indexAxis: "y",
      plugins: { legend: { labels: { color: SLATE_300 } } },
      scales: {
        x: { stacked: true, min: 0, max: 5,
             ticks: { stepSize: 1 } },
        y: { stacked: true },
      },
    },
  });
}

// ─── Latency (retrieval vs generation) per query ───────────────────────────
function renderLatency(run) {
  const rows = run.per_query;
  new Chart(document.getElementById("chart-latency"), {
    type: "bar",
    data: {
      labels: rows.map(r => r.id),
      datasets: [
        {
          label: "retrieval ms",
          data: rows.map(r => r.retrieval_ms ?? 0),
          backgroundColor: "rgba(34,211,238,0.8)",
          stack: "t",
          borderRadius: 2,
        },
        {
          label: "generation ms",
          data: rows.map(r => r.generation_ms ?? 0),
          backgroundColor: "rgba(232,121,249,0.8)",
          stack: "t",
          borderRadius: 2,
        },
      ],
    },
    options: {
      responsive: true, maintainAspectRatio: false,
      plugins: { legend: { labels: { color: SLATE_300 } } },
      scales: {
        x: { stacked: true, ticks: { maxRotation: 45, minRotation: 45 } },
        y: { stacked: true, beginAtZero: true,
             ticks: { callback: v => v + " ms" } },
      },
    },
  });
}

// ─── Aggregate metrics across all runs ─────────────────────────────────────
function renderRunHistory(runs) {
  const labels = runs.map(r => (r.timestamp || "").replace("T", " ").replace("Z", ""));
  const series = [
    { key: "source_routing_top1_rate", label: "routing top-1", color: "rgba(34,211,238,0.9)" },
    { key: "mean_source_recall_top5",  label: "source recall@5", color: "rgba(232,121,249,0.9)" },
    { key: "mean_keyword_recall",      label: "keyword recall", color: "rgba(52,211,153,0.9)" },
    { key: "mean_citation_validity",   label: "citation valid", color: "rgba(251,191,36,0.9)" },
  ];
  new Chart(document.getElementById("chart-run-history"), {
    type: "line",
    data: {
      labels,
      datasets: series.map(s => ({
        label: s.label,
        data: runs.map(r => (r.aggregate?.[s.key] ?? null) * 100),
        borderColor: s.color,
        backgroundColor: s.color,
        tension: 0.3, pointRadius: 4,
      })),
    },
    options: {
      responsive: true, maintainAspectRatio: false,
      plugins: { legend: { labels: { color: SLATE_300 } } },
      scales: {
        y: { beginAtZero: true, max: 100,
             ticks: { callback: v => v + "%" } },
      },
    },
  });
}

// ─── Corpus stats: docs + chunks per source ────────────────────────────────
function renderCorpus(corpus) {
  const sources = Object.keys(corpus.chunks || {});
  new Chart(document.getElementById("chart-corpus"), {
    type: "bar",
    data: {
      labels: sources,
      datasets: [
        {
          label: "documents",
          data: sources.map(s => corpus.docs?.[s] ?? 0),
          backgroundColor: sources.map(s => SOURCE_COLORS[s] || SLATE_400),
          borderRadius: 3,
          yAxisID: "y",
        },
        {
          label: "chunks",
          data: sources.map(s => corpus.chunks?.[s] ?? 0),
          backgroundColor: sources.map(s => (SOURCE_COLORS[s] || SLATE_400) + "55"),
          borderRadius: 3,
          yAxisID: "y",
        },
      ],
    },
    options: {
      responsive: true, maintainAspectRatio: false,
      plugins: { legend: { labels: { color: SLATE_300 } } },
      scales: { y: { beginAtZero: true } },
    },
  });
}

// ─── Top sections per source (horizontal bar) ──────────────────────────────
function renderSections(corpus) {
  const sections = (corpus.sections || []).slice(0, 15);
  new Chart(document.getElementById("chart-sections"), {
    type: "bar",
    data: {
      labels: sections.map(s => `${s.source_type} / ${s.section}`),
      datasets: [{
        label: "chunks",
        data: sections.map(s => s.n),
        backgroundColor: sections.map(s => SOURCE_COLORS[s.source_type] || SLATE_400),
        borderRadius: 2,
      }],
    },
    options: {
      responsive: true, maintainAspectRatio: false,
      indexAxis: "y",
      plugins: { legend: { display: false } },
      scales: { x: { beginAtZero: true } },
    },
  });
}