RAG_Eval / tests /test_pipeline_end_to_end.py
Rom89823974978's picture
Updated codebase
12409b1
import json
import tempfile
import pytest
from pathlib import Path
import numpy as np
from evaluation.config import GeneratorConfig, PipelineConfig, RetrieverConfig
from evaluation.pipeline import RAGPipeline
class _DummyGenerator:
"""Always returns a fixed answer, ignoring HF pipeline."""
def generate(self, question: str, contexts: list[str], **kwargs) -> str:
return "DUMMY_ANSWER"
def __repr__(self):
return "DummyGenerator"
@pytest.fixture
def tmp_doc_store(tmp_path_factory):
docs = [
{"id": 0, "text": "Retrieval Augmented Generation combines retrieval and generation."},
{"id": 1, "text": "BM25 is a strong baseline."},
{"id": 2, "text": "FAISS enables efficient similarity search."},
]
doc_path = tmp_path_factory.mktemp("docs") / "docs.jsonl"
with doc_path.open("w") as f:
for row in docs:
f.write(json.dumps(row) + "\n")
return doc_path
def test_pipeline_with_dense(tmp_doc_store, monkeypatch, tmp_path):
import evaluation.generators.hf_generator as hf_module
monkeypatch.setattr(hf_module, "HFGenerator", _DummyGenerator)
cfg = PipelineConfig(
retriever=RetrieverConfig(
name="dense",
top_k=2,
faiss_index=tmp_path / "dense.idx",
doc_store=tmp_doc_store,
device="cpu",
model_name="dummy/ignored",
),
generator=GeneratorConfig(model_name="dummy"),
)
pipeline = RAGPipeline(cfg)
results = pipeline.run_queries([{"question": "Q?", "id": 0}])
assert isinstance(results, list)
assert all("answer" in r for r in results)