# evaluation/query_runner.py

from searcher.search_engine import SearchEngine


class QueryRunner:
    """
    Runs all evaluation queries through your SearchEngine and collects
    the ranked result lists for scoring.

    The results are formatted exactly as the Evaluator expects:
        {query_id: [(doc_id, score), ...]}   ranked best-first
    """

    def __init__(self, config_path: str = "config.yaml"):
        self.engine = SearchEngine(config_path)

    def _extract_doc_id(self, filepath: str) -> str:
        """
        Strip dataset prefix from fake filepath so it matches qrels doc_ids.

        Examples:
            "scifact://12345"    →  "12345"
            "nfcorpus://MED-10"  →  "MED-10"
            "/real/file.pdf"     →  "/real/file.pdf"  (real files unchanged)

        This is critical — without stripping, doc_ids like "nfcorpus://MED-10"
        will never match qrels keys like "MED-10" and all scores will be 0.0
        """
        if "://" in filepath:
            return filepath.split("://", 1)[1]
        return filepath

    def run(
        self,
        queries: dict,
        top_k: int = 100,
        mode: str = "full",
    ) -> dict:
        """
        Run all queries and return ranked results.

        Args:
            queries — {query_id: query_text}
            top_k   — number of results per query (use 100 for eval)
            mode    — pipeline variant to test:
                        "dense"   → dense retrieval only
                        "sparse"  → BM25 only
                        "hybrid"  → dense + BM25 + RRF (no reranker)
                        "full"    → complete pipeline with reranker

        Returns:
            dict — {query_id: [(doc_id, rank_score), ...]}
        """
        results = {}
        total   = len(queries)

        for i, (query_id, query_text) in enumerate(queries.items(), 1):
            if i % 50 == 0:
                print(f"  Running query {i}/{total}...")

            try:
                if mode == "dense":
                    raw    = self.engine.dense_retriever.retrieve(query_text, top_k=top_k)
                    ranked = [
                        (self._extract_doc_id(r["filepath"]), -r["dense_score"])
                        for r in raw
                    ]

                elif mode == "sparse":
                    raw    = self.engine.sparse_retriever.retrieve(query_text, top_k=top_k)
                    ranked = [
                        (self._extract_doc_id(r["filepath"]), r["sparse_score"])
                        for r in raw
                    ]

                elif mode == "hybrid":
                    dense_raw  = self.engine.dense_retriever.retrieve(query_text, top_k=top_k)
                    sparse_raw = self.engine.sparse_retriever.retrieve(query_text, top_k=top_k)
                    fused      = self.engine.fusion_ranker.fuse(dense_raw, sparse_raw, top_k=top_k)
                    ranked     = [
                        (self._extract_doc_id(r["filepath"]), r["rrf_score"])
                        for r in fused
                    ]

                else:  # full pipeline
                    output = self.engine.search(query_text, top_k=top_k)
                    ranked = [
                        (
                            self._extract_doc_id(r["filepath"]),
                            r.get("rerank_score", r.get("rrf_score", 0))
                        )
                        for r in output["results"]
                    ]

                # Deduplicate by doc_id
                # multiple chunks from same doc → keep only the best score
                seen = {}
                for doc_id, score in ranked:
                    if doc_id not in seen or score > seen[doc_id]:
                        seen[doc_id] = score

                results[query_id] = sorted(
                    seen.items(),
                    key=lambda x: x[1],
                    reverse=True
                )

            except Exception as e:
                print(f"  Error on query {query_id}: {e}")
                results[query_id] = []

        return results


if __name__ == "__main__":
    from evaluation.dataset_loader import DatasetLoader

    loader  = DatasetLoader("data/scifact")
    queries = loader.load_queries()

    runner  = QueryRunner()
    results = runner.run(queries, top_k=10, mode="full")

    sample_qid = list(results.keys())[0]
    print(f"\nQuery {sample_qid} top results:")
    for doc_id, score in results[sample_qid][:5]:
        print(f"  doc {doc_id}  score={score:.4f}")