import json import tempfile import pytest from pathlib import Path import numpy as np from evaluation.config import GeneratorConfig, PipelineConfig, RetrieverConfig from evaluation.pipeline import RAGPipeline class _DummyGenerator: """Always returns a fixed answer, ignoring HF pipeline.""" def generate(self, question: str, contexts: list[str], **kwargs) -> str: return "DUMMY_ANSWER" def __repr__(self): return "DummyGenerator" @pytest.fixture def tmp_doc_store(tmp_path_factory): docs = [ {"id": 0, "text": "Retrieval Augmented Generation combines retrieval and generation."}, {"id": 1, "text": "BM25 is a strong baseline."}, {"id": 2, "text": "FAISS enables efficient similarity search."}, ] doc_path = tmp_path_factory.mktemp("docs") / "docs.jsonl" with doc_path.open("w") as f: for row in docs: f.write(json.dumps(row) + "\n") return doc_path def test_pipeline_with_dense(tmp_doc_store, monkeypatch, tmp_path): import evaluation.generators.hf_generator as hf_module monkeypatch.setattr(hf_module, "HFGenerator", _DummyGenerator) cfg = PipelineConfig( retriever=RetrieverConfig( name="dense", top_k=2, faiss_index=tmp_path / "dense.idx", doc_store=tmp_doc_store, device="cpu", model_name="dummy/ignored", ), generator=GeneratorConfig(model_name="dummy"), ) pipeline = RAGPipeline(cfg) results = pipeline.run_queries([{"question": "Q?", "id": 0}]) assert isinstance(results, list) assert all("answer" in r for r in results)