Spaces:

JackSparrow89
/

Semantic_File

Sleeping

File size: 4,713 Bytes

bb04c5f

# evaluation/query_runner.py

from searcher.search_engine import SearchEngine


class QueryRunner:
    """

    Runs all evaluation queries through your SearchEngine and collects

    the ranked result lists for scoring.



    The results are formatted exactly as the Evaluator expects:

        {query_id: [(doc_id, score), ...]}   ranked best-first

    """

    def __init__(self, config_path: str = "config.yaml"):
        self.engine = SearchEngine(config_path)

    def _extract_doc_id(self, filepath: str) -> str:
        """

        Strip dataset prefix from fake filepath so it matches qrels doc_ids.



        Examples:

            "scifact://12345"    →  "12345"

            "nfcorpus://MED-10"  →  "MED-10"

            "/real/file.pdf"     →  "/real/file.pdf"  (real files unchanged)



        This is critical — without stripping, doc_ids like "nfcorpus://MED-10"

        will never match qrels keys like "MED-10" and all scores will be 0.0

        """
        if "://" in filepath:
            return filepath.split("://", 1)[1]
        return filepath

    def run(

        self,

        queries: dict,

        top_k: int = 100,

        mode: str = "full",

    ) -> dict:
        """

        Run all queries and return ranked results.



        Args:

            queries — {query_id: query_text}

            top_k   — number of results per query (use 100 for eval)

            mode    — pipeline variant to test:

                        "dense"   → dense retrieval only

                        "sparse"  → BM25 only

                        "hybrid"  → dense + BM25 + RRF (no reranker)

                        "full"    → complete pipeline with reranker



        Returns:

            dict — {query_id: [(doc_id, rank_score), ...]}

        """
        results = {}
        total   = len(queries)

        for i, (query_id, query_text) in enumerate(queries.items(), 1):
            if i % 50 == 0:
                print(f"  Running query {i}/{total}...")

            try:
                if mode == "dense":
                    raw    = self.engine.dense_retriever.retrieve(query_text, top_k=top_k)
                    ranked = [
                        (self._extract_doc_id(r["filepath"]), -r["dense_score"])
                        for r in raw
                    ]

                elif mode == "sparse":
                    raw    = self.engine.sparse_retriever.retrieve(query_text, top_k=top_k)
                    ranked = [
                        (self._extract_doc_id(r["filepath"]), r["sparse_score"])
                        for r in raw
                    ]

                elif mode == "hybrid":
                    dense_raw  = self.engine.dense_retriever.retrieve(query_text, top_k=top_k)
                    sparse_raw = self.engine.sparse_retriever.retrieve(query_text, top_k=top_k)
                    fused      = self.engine.fusion_ranker.fuse(dense_raw, sparse_raw, top_k=top_k)
                    ranked     = [
                        (self._extract_doc_id(r["filepath"]), r["rrf_score"])
                        for r in fused
                    ]

                else:  # full pipeline
                    output = self.engine.search(query_text, top_k=top_k)
                    ranked = [
                        (
                            self._extract_doc_id(r["filepath"]),
                            r.get("rerank_score", r.get("rrf_score", 0))
                        )
                        for r in output["results"]
                    ]

                # Deduplicate by doc_id
                # multiple chunks from same doc → keep only the best score
                seen = {}
                for doc_id, score in ranked:
                    if doc_id not in seen or score > seen[doc_id]:
                        seen[doc_id] = score

                results[query_id] = sorted(
                    seen.items(),
                    key=lambda x: x[1],
                    reverse=True
                )

            except Exception as e:
                print(f"  Error on query {query_id}: {e}")
                results[query_id] = []

        return results


if __name__ == "__main__":
    from evaluation.dataset_loader import DatasetLoader

    loader  = DatasetLoader("data/scifact")
    queries = loader.load_queries()

    runner  = QueryRunner()
    results = runner.run(queries, top_k=10, mode="full")

    sample_qid = list(results.keys())[0]
    print(f"\nQuery {sample_qid} top results:")
    for doc_id, score in results[sample_qid][:5]:
        print(f"  doc {doc_id}  score={score:.4f}")