File size: 4,514 Bytes
bb04c5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluation Dashboard</title>
    <link rel="stylesheet" href="/static/style.css">
</head>
<body>

<nav>
    <a href="/" class="brand">πŸ” Semantic Search</a>
    <a href="/">Search</a>
    <a href="/dashboard">Dashboard</a>
</nav>

<div class="container">

    <div style="margin-top:1.5rem;">
        <div class="dash-title">Evaluation Dashboard</div>
        <div class="dash-sub">
            BEIR Benchmark β€” Full pipeline (Dense + BM25 + RRF + Cross-Encoder)
        </div>
    </div>

    {% if datasets %}

    <!-- metric cards -->
    <div class="dash-grid">
        {% for d in datasets %}
        <div class="metric-card">
            <h3>
                {% if d.name == "scifact" %}πŸ”¬{% else %}πŸ₯{% endif %}
                {{ d.name | title }}
                <span style="font-size:0.76rem;color:#999;font-weight:400;">
                    β€” {{ d.queries }} queries
                </span>
            </h3>

            <div class="metric-row">
                <span class="metric-label">NDCG@10</span>
                <div class="bar-wrap">
                    <div class="bar green" style="width:{{ (d.ndcg * 100) | round(1) }}%"></div>
                </div>
                <span class="metric-val">{{ "%.4f" | format(d.ndcg) }}</span>
            </div>

            <div class="metric-row">
                <span class="metric-label">MRR</span>
                <div class="bar-wrap">
                    <div class="bar" style="width:{{ (d.mrr * 100) | round(1) }}%"></div>
                </div>
                <span class="metric-val">{{ "%.4f" | format(d.mrr) }}</span>
            </div>

            <div class="metric-row">
                <span class="metric-label">MAP@100</span>
                <div class="bar-wrap">
                    <div class="bar amber" style="width:{{ (d.map * 100) | round(1) }}%"></div>
                </div>
                <span class="metric-val">{{ "%.4f" | format(d.map) }}</span>
            </div>

            <div class="metric-row">
                <span class="metric-label">Recall@100</span>
                <div class="bar-wrap">
                    <div class="bar" style="width:{{ (d.recall * 100) | round(1) }}%"></div>
                </div>
                <span class="metric-val">{{ "%.4f" | format(d.recall) }}</span>
            </div>

            <div class="metric-row">
                <span class="metric-label">P@10</span>
                <div class="bar-wrap">
                    <div class="bar amber" style="width:{{ (d.precision * 100) | round(1) }}%"></div>
                </div>
                <span class="metric-val">{{ "%.4f" | format(d.precision) }}</span>
            </div>
        </div>
        {% endfor %}
    </div>

    <!-- ablation tables -->
    {% for d in datasets %}
    <div class="section-label">
        Ablation Table β€” {{ d.name | title }}
    </div>
    <div class="table-card">
        <table>
            <thead>
                <tr>
                    <th>Mode</th>
                    <th>NDCG@10</th>
                    <th>MAP@100</th>
                    <th>MRR</th>
                    <th>Recall@100</th>
                    <th>P@10</th>
                </tr>
            </thead>
            <tbody>
                {% for mode_name, m in d.modes.items() %}
                <tr {% if mode_name == "full" %}class="best"{% endif %}>
                    <td>{{ mode_name }}</td>
                    <td>{{ "%.4f" | format(m.get("NDCG@10",    0)) }}</td>
                    <td>{{ "%.4f" | format(m.get("MAP@100",    0)) }}</td>
                    <td>{{ "%.4f" | format(m.get("MRR",        0)) }}</td>
                    <td>{{ "%.4f" | format(m.get("Recall@100", 0)) }}</td>
                    <td>{{ "%.4f" | format(m.get("P@10",       0)) }}</td>
                </tr>
                {% endfor %}
            </tbody>
        </table>
    </div>
    {% endfor %}

    {% else %}
    <div class="no-results">
        <p>No evaluation results found.</p>
        <p style="margin-top:0.5rem;font-size:0.85rem;">
            Run:
            <code>python -m evaluation.run_eval --datasets scifact nfcorpus --mode all</code>
        </p>
    </div>
    {% endif %}

</div>
</body>
</html>