Spaces:
Sleeping
Sleeping
File size: 4,514 Bytes
bb04c5f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluation Dashboard</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<nav>
<a href="/" class="brand">π Semantic Search</a>
<a href="/">Search</a>
<a href="/dashboard">Dashboard</a>
</nav>
<div class="container">
<div style="margin-top:1.5rem;">
<div class="dash-title">Evaluation Dashboard</div>
<div class="dash-sub">
BEIR Benchmark β Full pipeline (Dense + BM25 + RRF + Cross-Encoder)
</div>
</div>
{% if datasets %}
<!-- metric cards -->
<div class="dash-grid">
{% for d in datasets %}
<div class="metric-card">
<h3>
{% if d.name == "scifact" %}π¬{% else %}π₯{% endif %}
{{ d.name | title }}
<span style="font-size:0.76rem;color:#999;font-weight:400;">
β {{ d.queries }} queries
</span>
</h3>
<div class="metric-row">
<span class="metric-label">NDCG@10</span>
<div class="bar-wrap">
<div class="bar green" style="width:{{ (d.ndcg * 100) | round(1) }}%"></div>
</div>
<span class="metric-val">{{ "%.4f" | format(d.ndcg) }}</span>
</div>
<div class="metric-row">
<span class="metric-label">MRR</span>
<div class="bar-wrap">
<div class="bar" style="width:{{ (d.mrr * 100) | round(1) }}%"></div>
</div>
<span class="metric-val">{{ "%.4f" | format(d.mrr) }}</span>
</div>
<div class="metric-row">
<span class="metric-label">MAP@100</span>
<div class="bar-wrap">
<div class="bar amber" style="width:{{ (d.map * 100) | round(1) }}%"></div>
</div>
<span class="metric-val">{{ "%.4f" | format(d.map) }}</span>
</div>
<div class="metric-row">
<span class="metric-label">Recall@100</span>
<div class="bar-wrap">
<div class="bar" style="width:{{ (d.recall * 100) | round(1) }}%"></div>
</div>
<span class="metric-val">{{ "%.4f" | format(d.recall) }}</span>
</div>
<div class="metric-row">
<span class="metric-label">P@10</span>
<div class="bar-wrap">
<div class="bar amber" style="width:{{ (d.precision * 100) | round(1) }}%"></div>
</div>
<span class="metric-val">{{ "%.4f" | format(d.precision) }}</span>
</div>
</div>
{% endfor %}
</div>
<!-- ablation tables -->
{% for d in datasets %}
<div class="section-label">
Ablation Table β {{ d.name | title }}
</div>
<div class="table-card">
<table>
<thead>
<tr>
<th>Mode</th>
<th>NDCG@10</th>
<th>MAP@100</th>
<th>MRR</th>
<th>Recall@100</th>
<th>P@10</th>
</tr>
</thead>
<tbody>
{% for mode_name, m in d.modes.items() %}
<tr {% if mode_name == "full" %}class="best"{% endif %}>
<td>{{ mode_name }}</td>
<td>{{ "%.4f" | format(m.get("NDCG@10", 0)) }}</td>
<td>{{ "%.4f" | format(m.get("MAP@100", 0)) }}</td>
<td>{{ "%.4f" | format(m.get("MRR", 0)) }}</td>
<td>{{ "%.4f" | format(m.get("Recall@100", 0)) }}</td>
<td>{{ "%.4f" | format(m.get("P@10", 0)) }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endfor %}
{% else %}
<div class="no-results">
<p>No evaluation results found.</p>
<p style="margin-top:0.5rem;font-size:0.85rem;">
Run:
<code>python -m evaluation.run_eval --datasets scifact nfcorpus --mode all</code>
</p>
</div>
{% endif %}
</div>
</body>
</html> |