File size: 1,662 Bytes
fc20fed
 
 
cdf4160
 
fc20fed
cdf4160
fc20fed
cdf4160
 
 
 
fc20fed
 
 
 
 
 
 
cdf4160
fc20fed
 
 
 
 
 
 
 
 
 
 
 
 
cdf4160
 
 
fc20fed
cdf4160
fc20fed
cdf4160
 
 
 
 
 
 
 
12409b1
cdf4160
 
 
 
fc20fed
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import tempfile
import pytest
from pathlib import Path

import numpy as np

from evaluation.config import GeneratorConfig, PipelineConfig, RetrieverConfig
from evaluation.pipeline import RAGPipeline


class _DummyGenerator:
    """Always returns a fixed answer, ignoring HF pipeline."""

    def generate(self, question: str, contexts: list[str], **kwargs) -> str:
        return "DUMMY_ANSWER"

    def __repr__(self):
        return "DummyGenerator"


@pytest.fixture
def tmp_doc_store(tmp_path_factory):
    docs = [
        {"id": 0, "text": "Retrieval Augmented Generation combines retrieval and generation."},
        {"id": 1, "text": "BM25 is a strong baseline."},
        {"id": 2, "text": "FAISS enables efficient similarity search."},
    ]
    doc_path = tmp_path_factory.mktemp("docs") / "docs.jsonl"
    with doc_path.open("w") as f:
        for row in docs:
            f.write(json.dumps(row) + "\n")
    return doc_path


def test_pipeline_with_dense(tmp_doc_store, monkeypatch, tmp_path):
    import evaluation.generators.hf_generator as hf_module

    monkeypatch.setattr(hf_module, "HFGenerator", _DummyGenerator)

    cfg = PipelineConfig(
        retriever=RetrieverConfig(
            name="dense",
            top_k=2,
            faiss_index=tmp_path / "dense.idx",
            doc_store=tmp_doc_store,
            device="cpu",
            model_name="dummy/ignored", 
        ),
        generator=GeneratorConfig(model_name="dummy"),
    )
    pipeline = RAGPipeline(cfg)

    results = pipeline.run_queries([{"question": "Q?", "id": 0}])
    assert isinstance(results, list)
    assert all("answer" in r for r in results)