File size: 3,202 Bytes
d0abef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import json
import requests
import numpy as np


API_URL = "http://localhost:8000/search"

# =====================================================
# Utility: MRR
# =====================================================
def compute_mrr(all_ranks):
    if not all_ranks:
        return 0.0
    rr = [1.0 / r for r in all_ranks]
    return float(np.mean(rr))


# =====================================================
# Utility: NDCG@K
# =====================================================
def compute_ndcg(results, k):
    """results = [1,0,0...] relevance for retrieved docs"""
    dcg = 0
    for rank, rel in enumerate(results[:k], start=1):
        if rel == 1:
            dcg += 1 / np.log2(rank + 1)

    idcg = 1 / np.log2(1 + 1)  # ideal rank = 1
    return dcg / idcg if idcg != 0 else 0


# =====================================================
# MAIN EVALUATION FUNCTION
# =====================================================
def run_evaluation(query_file="generated_queries.json", top_k=10):
    """
    top_k is FIXED = 10 for a realistic evaluation.
    """

    with open(query_file) as f:
        queries = json.load(f)

    correct = []
    ranks = []
    ndcg_scores = []
    detailed = []

    for item in queries:
        query = item["query"]
        expected = item["doc_id"] + ".txt"

        # ----------------------------
        # CALL API
        # ----------------------------
        resp = requests.post(API_URL, json={"query": query, "top_k": top_k})
        if resp.status_code != 200:
            continue

        results = resp.json().get("results", [])
        retrieved = [r["filename"] for r in results]

        # relevance array for NDCG
        relevance = [1 if fn == expected else 0 for fn in retrieved]

        # ----------------------------
        # ACCURACY
        # ----------------------------
        hit = expected in retrieved
        correct.append(1 if hit else 0)

        # ----------------------------
        # RANK for MRR
        # ----------------------------
        if hit:
            rank_position = retrieved.index(expected) + 1
            ranks.append(rank_position)
        else:
            rank_position = None

        # ----------------------------
        # NDCG
        # ----------------------------
        ndcg_scores.append(compute_ndcg(relevance, top_k))

        # ----------------------------
        # Save detail
        # ----------------------------
        detailed.append({
            "query": query,
            "expected": expected,
            "retrieved": retrieved,
            "rank": rank_position,
            "is_correct": hit
        })

    # =====================================================
    # FINAL METRICS
    # =====================================================
    accuracy = round(np.mean(correct) * 100, 2)
    mrr = round(compute_mrr(ranks), 4)
    mean_ndcg = round(float(np.mean(ndcg_scores)), 4)

    summary = {
        "accuracy": accuracy,
        "mrr": mrr,
        "ndcg": mean_ndcg,
        "total_queries": len(queries),
        "correct_count": sum(correct),
        "incorrect_count": len(queries) - sum(correct),
        "details": detailed
    }

    return summary