Spaces:
Sleeping
Sleeping
| import json | |
| import argparse | |
| import sys | |
| def normalize_std(std_string): | |
| """Normalizes the standard name by removing spaces and converting to lowercase for fair matching.""" | |
| return str(std_string).replace(" ", "").lower() | |
| def evaluate_results(results_file): | |
| try: | |
| with open(results_file, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| except Exception as e: | |
| print(f"Error reading results file: {e}") | |
| sys.exit(1) | |
| total_queries = len(data) | |
| if total_queries == 0: | |
| print("No queries found in the result file.") | |
| return | |
| hits_at_3 = 0 | |
| mrr_sum_at_5 = 0.0 | |
| total_latency = 0.0 | |
| for item in data: | |
| # Normalize expected and retrieved standards | |
| expected = set(normalize_std(std) for std in item.get("expected_standards", [])) | |
| retrieved = [normalize_std(std) for std in item.get("retrieved_standards", [])] | |
| latency = item.get("latency_seconds", 0.0) | |
| total_latency += latency | |
| # 1. Calculate Hit Rate @3 (Is at least 1 expected standard in top 3?) | |
| top_3_retrieved = retrieved[:3] | |
| if any(std in expected for std in top_3_retrieved): | |
| hits_at_3 += 1 | |
| # 2. Calculate MRR @5 (Mean Reciprocal Rank of first correct standard in top 5) | |
| top_5_retrieved = retrieved[:5] | |
| mrr = 0.0 | |
| for rank, std in enumerate(top_5_retrieved, start=1): | |
| if std in expected: | |
| mrr = 1.0 / rank | |
| break # Only care about the first correct standard | |
| mrr_sum_at_5 += mrr | |
| # Calculate Final Metrics | |
| hit_rate_3 = (hits_at_3 / total_queries) * 100 | |
| mrr_5 = mrr_sum_at_5 / total_queries | |
| avg_latency = total_latency / total_queries | |
| print("=" * 40) | |
| print(" BIS HACKATHON EVALUATION RESULTS") | |
| print("=" * 40) | |
| print(f"Total Queries Evaluated : {total_queries}") | |
| print(f"Hit Rate @3 : {hit_rate_3:.2f}% \t(Target: >80%)") | |
| print(f"MRR @5 : {mrr_5:.4f} \t(Target: >0.7)") | |
| print(f"Avg Latency : {avg_latency:.2f} sec \t(Target: <5 seconds)") | |
| print("=" * 40) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser( | |
| description="Evaluate RAG Pipeline Results for BIS Hackathon" | |
| ) | |
| parser.add_argument( | |
| "--results", | |
| type=str, | |
| required=True, | |
| help="Path to the participant's output JSON file", | |
| ) | |
| args = parser.parse_args() | |
| evaluate_results(args.results) | |