Spaces:

vinimoreira
/

RAG_backend

Sleeping

App Files Files Community

RAG_backend / src /evaluation /evaluator.py

vinimoreira

Add files for RAG backend

2068d15 verified 8 months ago

raw

history blame contribute delete

2.35 kB

	import json
	import numpy as np
	from src.retrieval.retriever import Retriever
	from src.retrieval.reranker import HybridReranker
	from src.evaluation.metrics import (
	precision_at_k as retrieval_precision_at_k,
	recall_at_k,
	mean_reciprocal_rank,
	bleu_score
	)
	from src.ingestion.document_loader import load_documents_from_dir
	from src.ingestion.preprocessor import preprocess_documents
	from src.ingestion.text_splitter import split_text


	def run_evaluation(
	benchmark_path: str = "tests/benchmark.json",
	k: int = 3,
	top_k_dense: int = 10,
	top_k_final: int = 3,
	sparse_alpha: float = 0.5
	):
	with open(benchmark_path, encoding="utf-8") as f:
	benchmarks = json.load(f)

	docs = load_documents_from_dir("data/raw")
	clean_docs = preprocess_documents(docs)
	chunks = split_text(clean_docs, chunk_size=300, chunk_overlap=50)
	texts = [chunk['content'] for chunk in chunks]

	retriever = Retriever("data/embeddings/batch_000.npy")
	reranker = HybridReranker(
	retriever=retriever,
	chunk_texts=texts,
	reranker_model="cross-encoder/ms-marco-MiniLM-L-12-v2",
	sparse_alpha=sparse_alpha
	)

	all_retrieved = []
	all_relevant = []

	print(f"starting assessment reranker: Precision@{k}, Recall@{k}, MRR")
	print(f"dense top_k: {top_k_dense}, final top_k: {top_k_final}, sparse_alpha: {sparse_alpha}\n")

	for i, entry in enumerate(benchmarks, 1):
	query = entry['query']
	relevant_idxs = entry.get('relevant_idxs', [])

	idxs, scores = reranker.retrieve_and_rerank(
	query,
	top_k_dense=top_k_dense,
	top_k_final=top_k_final
	)

	p = retrieval_precision_at_k(retrieved_idxs=idxs, relevant_idxs=relevant_idxs, k=k)
	r = recall_at_k(retrieved_idxs=idxs, relevant_idxs=relevant_idxs, k=k)

	all_retrieved.append(idxs)
	all_relevant.append(relevant_idxs)

	print(f"{i}. Query: {query}")
	print(f" Precision@{k}: {p:.2f}, Recall@{k}: {r:.2f}")
	print(f" Retrieved idxs: {idxs}")
	print(f" Rerank scores: {[f'{s:.4f}' for s in scores]}\n")

	mrr = mean_reciprocal_rank(retrieved_lists=all_retrieved, relevant_idxs_list=all_relevant)
	print(f"mean reciprocal rank (MRR): {mrr:.2f}\n")

	if __name__ == "__main__":
	run_evaluation()