Spaces:
Sleeping
Sleeping
File size: 2,507 Bytes
1522dee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import json
import argparse
import sys
def normalize_std(std_string):
"""Normalizes the standard name by removing spaces and converting to lowercase for fair matching."""
return str(std_string).replace(" ", "").lower()
def evaluate_results(results_file):
try:
with open(results_file, "r", encoding="utf-8") as f:
data = json.load(f)
except Exception as e:
print(f"Error reading results file: {e}")
sys.exit(1)
total_queries = len(data)
if total_queries == 0:
print("No queries found in the result file.")
return
hits_at_3 = 0
mrr_sum_at_5 = 0.0
total_latency = 0.0
for item in data:
# Normalize expected and retrieved standards
expected = set(normalize_std(std) for std in item.get("expected_standards", []))
retrieved = [normalize_std(std) for std in item.get("retrieved_standards", [])]
latency = item.get("latency_seconds", 0.0)
total_latency += latency
# 1. Calculate Hit Rate @3 (Is at least 1 expected standard in top 3?)
top_3_retrieved = retrieved[:3]
if any(std in expected for std in top_3_retrieved):
hits_at_3 += 1
# 2. Calculate MRR @5 (Mean Reciprocal Rank of first correct standard in top 5)
top_5_retrieved = retrieved[:5]
mrr = 0.0
for rank, std in enumerate(top_5_retrieved, start=1):
if std in expected:
mrr = 1.0 / rank
break # Only care about the first correct standard
mrr_sum_at_5 += mrr
# Calculate Final Metrics
hit_rate_3 = (hits_at_3 / total_queries) * 100
mrr_5 = mrr_sum_at_5 / total_queries
avg_latency = total_latency / total_queries
print("=" * 40)
print(" BIS HACKATHON EVALUATION RESULTS")
print("=" * 40)
print(f"Total Queries Evaluated : {total_queries}")
print(f"Hit Rate @3 : {hit_rate_3:.2f}% \t(Target: >80%)")
print(f"MRR @5 : {mrr_5:.4f} \t(Target: >0.7)")
print(f"Avg Latency : {avg_latency:.2f} sec \t(Target: <5 seconds)")
print("=" * 40)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Evaluate RAG Pipeline Results for BIS Hackathon"
)
parser.add_argument(
"--results",
type=str,
required=True,
help="Path to the participant's output JSON file",
)
args = parser.parse_args()
evaluate_results(args.results)
|