File size: 11,820 Bytes
731a241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
"""Integration tests for RAG integration.

These tests require OPENAI_API_KEY and may make real API calls.
Marked with @pytest.mark.integration to skip in unit test runs.
"""

import pytest

from src.services.llamaindex_rag import get_rag_service
from src.tools.rag_tool import create_rag_tool
from src.tools.search_handler import SearchHandler
from src.tools.tool_executor import execute_agent_task
from src.utils.config import settings
from src.utils.models import AgentTask, Citation, Evidence


@pytest.mark.integration
class TestRAGServiceIntegration:
    """Integration tests for LlamaIndexRAGService."""

    @pytest.mark.asyncio
    async def test_rag_service_ingest_and_retrieve(self):
        """RAG service should ingest and retrieve evidence."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        # Create RAG service
        rag_service = get_rag_service(collection_name="test_integration")

        # Create sample evidence
        evidence_list = [
            Evidence(
                content="Metformin is a first-line treatment for type 2 diabetes. It works by reducing glucose production in the liver and improving insulin sensitivity.",
                citation=Citation(
                    source="pubmed",
                    title="Metformin Mechanism of Action",
                    url="https://pubmed.ncbi.nlm.nih.gov/12345678/",
                    date="2024-01-15",
                    authors=["Smith J", "Johnson M"],
                ),
                relevance=0.9,
            ),
            Evidence(
                content="Recent studies suggest metformin may have neuroprotective effects in Alzheimer's disease models.",
                citation=Citation(
                    source="pubmed",
                    title="Metformin and Neuroprotection",
                    url="https://pubmed.ncbi.nlm.nih.gov/12345679/",
                    date="2024-02-20",
                    authors=["Brown K", "Davis L"],
                ),
                relevance=0.85,
            ),
        ]

        # Ingest evidence
        rag_service.ingest_evidence(evidence_list)

        # Retrieve evidence
        results = rag_service.retrieve("metformin diabetes", top_k=2)

        # Assert
        assert len(results) > 0
        assert any("metformin" in r["text"].lower() for r in results)
        assert all("text" in r for r in results)
        assert all("metadata" in r for r in results)

        # Cleanup
        rag_service.clear_collection()

    @pytest.mark.asyncio
    async def test_rag_service_query(self):
        """RAG service should synthesize responses from ingested evidence."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        rag_service = get_rag_service(collection_name="test_query")

        # Ingest evidence
        evidence_list = [
            Evidence(
                content="Python is a high-level programming language known for its simplicity and readability.",
                citation=Citation(
                    source="pubmed",
                    title="Python Programming",
                    url="https://example.com/python",
                    date="2024",
                    authors=["Author"],
                ),
            )
        ]
        rag_service.ingest_evidence(evidence_list)

        # Query
        response = rag_service.query("What is Python?", top_k=1)

        assert isinstance(response, str)
        assert len(response) > 0
        assert "python" in response.lower()

        # Cleanup
        rag_service.clear_collection()


@pytest.mark.integration
class TestRAGToolIntegration:
    """Integration tests for RAGTool."""

    @pytest.mark.asyncio
    async def test_rag_tool_search(self):
        """RAGTool should search RAG service and return Evidence objects."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        # Create RAG service and ingest evidence
        rag_service = get_rag_service(collection_name="test_rag_tool")
        evidence_list = [
            Evidence(
                content="Machine learning is a subset of artificial intelligence.",
                citation=Citation(
                    source="pubmed",
                    title="ML Basics",
                    url="https://example.com/ml",
                    date="2024",
                    authors=["ML Expert"],
                ),
            )
        ]
        rag_service.ingest_evidence(evidence_list)

        # Create RAG tool
        tool = create_rag_tool(rag_service=rag_service)

        # Search
        results = await tool.search("machine learning", max_results=5)

        # Assert
        assert len(results) > 0
        assert all(isinstance(e, Evidence) for e in results)
        assert results[0].citation.source == "rag"
        assert (
            "machine learning" in results[0].content.lower()
            or "artificial intelligence" in results[0].content.lower()
        )

        # Cleanup
        rag_service.clear_collection()

    @pytest.mark.asyncio
    async def test_rag_tool_empty_collection(self):
        """RAGTool should return empty list when collection is empty."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        rag_service = get_rag_service(collection_name="test_empty")
        rag_service.clear_collection()  # Ensure empty

        tool = create_rag_tool(rag_service=rag_service)
        results = await tool.search("any query")

        assert results == []


@pytest.mark.integration
class TestRAGAgentIntegration:
    """Integration tests for RAGAgent in tool executor."""

    @pytest.mark.asyncio
    async def test_rag_agent_execution(self):
        """RAGAgent should execute and return ToolAgentOutput."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        # Setup: Ingest evidence into RAG
        rag_service = get_rag_service(collection_name="test_rag_agent")
        evidence_list = [
            Evidence(
                content="Deep learning uses neural networks with multiple layers.",
                citation=Citation(
                    source="pubmed",
                    title="Deep Learning",
                    url="https://example.com/dl",
                    date="2024",
                    authors=["DL Researcher"],
                ),
            )
        ]
        rag_service.ingest_evidence(evidence_list)

        # Execute RAGAgent task
        task = AgentTask(
            agent="RAGAgent",
            query="deep learning",
            gap="Need information about deep learning",
        )

        result = await execute_agent_task(task)

        # Assert
        assert result.output
        assert "deep learning" in result.output.lower() or "neural network" in result.output.lower()
        assert len(result.sources) > 0

        # Cleanup
        rag_service.clear_collection()


@pytest.mark.integration
class TestRAGSearchHandlerIntegration:
    """Integration tests for RAG in SearchHandler."""

    @pytest.mark.asyncio
    async def test_search_handler_with_rag(self):
        """SearchHandler should work with RAG tool included."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        # Setup: Create RAG service and ingest some evidence
        rag_service = get_rag_service(collection_name="test_search_handler")
        evidence_list = [
            Evidence(
                content="Test evidence for search handler integration.",
                citation=Citation(
                    source="pubmed",
                    title="Test Evidence",
                    url="https://example.com/test",
                    date="2024",
                    authors=["Tester"],
                ),
            )
        ]
        rag_service.ingest_evidence(evidence_list)

        # Create SearchHandler with RAG
        handler = SearchHandler(
            tools=[],  # No other tools
            include_rag=True,
            auto_ingest_to_rag=False,  # Don't auto-ingest (already has data)
        )

        # Execute search
        result = await handler.execute("test evidence", max_results_per_tool=5)

        # Assert
        assert result.total_found > 0
        assert "rag" in result.sources_searched
        assert any(e.citation.source == "rag" for e in result.evidence)

        # Cleanup
        rag_service.clear_collection()

    @pytest.mark.asyncio
    async def test_search_handler_auto_ingest(self):
        """SearchHandler should auto-ingest evidence into RAG."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        # Create empty RAG service
        rag_service = get_rag_service(collection_name="test_auto_ingest")
        rag_service.clear_collection()

        # Create mock tool that returns evidence
        from unittest.mock import AsyncMock

        mock_tool = AsyncMock()
        mock_tool.name = "pubmed"
        mock_tool.search = AsyncMock(
            return_value=[
                Evidence(
                    content="Evidence to be ingested",
                    citation=Citation(
                        source="pubmed",
                        title="Test",
                        url="https://example.com",
                        date="2024",
                        authors=[],
                    ),
                )
            ]
        )

        # Create handler with auto-ingest enabled
        handler = SearchHandler(
            tools=[mock_tool],
            include_rag=False,  # Don't include RAG as search tool
            auto_ingest_to_rag=True,
        )
        handler._rag_service = rag_service  # Inject RAG service

        # Execute search
        await handler.execute("test query")

        # Verify evidence was ingested
        rag_results = rag_service.retrieve("Evidence to be ingested", top_k=1)
        assert len(rag_results) > 0

        # Cleanup
        rag_service.clear_collection()


@pytest.mark.integration
class TestRAGHybridSearchIntegration:
    """Integration tests for hybrid search (RAG + database)."""

    @pytest.mark.asyncio
    async def test_hybrid_search_rag_and_pubmed(self):
        """SearchHandler should support RAG + PubMed hybrid search."""
        if not settings.openai_api_key:
            pytest.skip("OPENAI_API_KEY required for RAG integration tests")

        # Setup: Ingest evidence into RAG
        rag_service = get_rag_service(collection_name="test_hybrid")
        evidence_list = [
            Evidence(
                content="Previously collected evidence about metformin.",
                citation=Citation(
                    source="pubmed",
                    title="Previous Research",
                    url="https://example.com/prev",
                    date="2024",
                    authors=[],
                ),
            )
        ]
        rag_service.ingest_evidence(evidence_list)

        # Note: This test would require real PubMed API access
        # For now, we'll just test that the handler can be created with both tools
        from src.tools.pubmed import PubMedTool

        handler = SearchHandler(
            tools=[PubMedTool()],
            include_rag=True,
            auto_ingest_to_rag=True,
        )

        # Verify handler has both tools
        tool_names = [t.name for t in handler.tools]
        assert "pubmed" in tool_names
        assert "rag" in tool_names

        # Cleanup
        rag_service.clear_collection()