Spaces:

softblackhole
/

rag-evaluation-system

Sleeping

rag-evaluation-system / tests /test_eval.py

soft.engineer

init project

e71fabd 4 months ago

2.9 kB

	import pytest
	import tempfile
	import os
	from core.eval import RAGEvaluator
	from core.retrieval import RAGManager
	from core.index import VectorStore
	from core.utils import Chunk, generate_id

	class TestRAGEvaluator:
	def test_calculate_hit_at_k_exact_match(self):
	"""Test Hit@k calculation with exact match"""
	persist_dir = tempfile.mkdtemp()
	rag_manager = RAGManager(persist_directory=persist_dir)
	evaluator = RAGEvaluator(rag_manager)

	retrieved = ["This is a test document about emergency procedures"]
	ground_truth = ["This is a test document about emergency procedures"]
	hit = evaluator._calculate_hit_at_k(retrieved, ground_truth, k=1)

	assert hit == 1.0

	def test_calculate_hit_at_k_no_match(self):
	"""Test Hit@k when no match found"""
	persist_dir = tempfile.mkdtemp()
	rag_manager = RAGManager(persist_directory=persist_dir)
	evaluator = RAGEvaluator(rag_manager)

	retrieved = ["Unrelated document"]
	ground_truth = ["Expected document"]
	hit = evaluator._calculate_hit_at_k(retrieved, ground_truth, k=1)

	assert hit == 0.0

	def test_calculate_mrr(self):
	"""Test MRR calculation"""
	persist_dir = tempfile.mkdtemp()
	rag_manager = RAGManager(persist_directory=persist_dir)
	evaluator = RAGEvaluator(rag_manager)

	# Mock documents_match to return True for first match
	retrieved = ["Wrong doc", "Correct doc"]
	ground_truth = ["Correct doc"]

	# Use semantic matching - will use embeddings
	# For this test, we'll just verify the method exists and handles input
	mrr = evaluator._calculate_mrr(retrieved, ground_truth)
	assert isinstance(mrr, float)
	assert 0.0 <= mrr <= 1.0

	def test_documents_match(self):
	"""Test document matching logic"""
	persist_dir = tempfile.mkdtemp()
	rag_manager = RAGManager(persist_directory=persist_dir)
	evaluator = RAGEvaluator(rag_manager)

	doc1 = "This is a test document about emergency procedures"
	doc2 = "This is a test document about emergency procedures"
	doc3 = "Completely different content about something else"

	# Should match exactly (same content should have high similarity)
	match_same = evaluator._documents_match(doc1, doc2)
	# match should be a boolean-like value (numpy.bool_ or bool)
	assert match_same == True or match_same == False

	# Since documents are identical, they should match
	assert bool(match_same) == True

	# Different documents might or might not match depending on threshold
	match_diff = evaluator._documents_match(doc1, doc3)
	assert bool(match_diff) == True or bool(match_diff) == False