Spaces:
Sleeping
Sleeping
| import json | |
| import numpy as np | |
| from src.retrieval.retriever import Retriever | |
| from src.retrieval.reranker import HybridReranker | |
| from src.evaluation.metrics import ( | |
| precision_at_k as retrieval_precision_at_k, | |
| recall_at_k, | |
| mean_reciprocal_rank, | |
| bleu_score | |
| ) | |
| from src.ingestion.document_loader import load_documents_from_dir | |
| from src.ingestion.preprocessor import preprocess_documents | |
| from src.ingestion.text_splitter import split_text | |
| def run_evaluation( | |
| benchmark_path: str = "tests/benchmark.json", | |
| k: int = 3, | |
| top_k_dense: int = 10, | |
| top_k_final: int = 3, | |
| sparse_alpha: float = 0.5 | |
| ): | |
| with open(benchmark_path, encoding="utf-8") as f: | |
| benchmarks = json.load(f) | |
| docs = load_documents_from_dir("data/raw") | |
| clean_docs = preprocess_documents(docs) | |
| chunks = split_text(clean_docs, chunk_size=300, chunk_overlap=50) | |
| texts = [chunk['content'] for chunk in chunks] | |
| retriever = Retriever("data/embeddings/batch_000.npy") | |
| reranker = HybridReranker( | |
| retriever=retriever, | |
| chunk_texts=texts, | |
| reranker_model="cross-encoder/ms-marco-MiniLM-L-12-v2", | |
| sparse_alpha=sparse_alpha | |
| ) | |
| all_retrieved = [] | |
| all_relevant = [] | |
| print(f"starting assessment reranker: Precision@{k}, Recall@{k}, MRR") | |
| print(f"dense top_k: {top_k_dense}, final top_k: {top_k_final}, sparse_alpha: {sparse_alpha}\n") | |
| for i, entry in enumerate(benchmarks, 1): | |
| query = entry['query'] | |
| relevant_idxs = entry.get('relevant_idxs', []) | |
| idxs, scores = reranker.retrieve_and_rerank( | |
| query, | |
| top_k_dense=top_k_dense, | |
| top_k_final=top_k_final | |
| ) | |
| p = retrieval_precision_at_k(retrieved_idxs=idxs, relevant_idxs=relevant_idxs, k=k) | |
| r = recall_at_k(retrieved_idxs=idxs, relevant_idxs=relevant_idxs, k=k) | |
| all_retrieved.append(idxs) | |
| all_relevant.append(relevant_idxs) | |
| print(f"{i}. Query: {query}") | |
| print(f" Precision@{k}: {p:.2f}, Recall@{k}: {r:.2f}") | |
| print(f" Retrieved idxs: {idxs}") | |
| print(f" Rerank scores: {[f'{s:.4f}' for s in scores]}\n") | |
| mrr = mean_reciprocal_rank(retrieved_lists=all_retrieved, relevant_idxs_list=all_relevant) | |
| print(f"mean reciprocal rank (MRR): {mrr:.2f}\n") | |
| if __name__ == "__main__": | |
| run_evaluation() |