File size: 8,010 Bytes
fd28242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa696e8
fd28242
 
 
fa696e8
fd28242
 
 
fa696e8
fd28242
 
 
fa696e8
 
fd28242
 
 
 
 
 
 
 
 
 
 
2c5db87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd28242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa696e8
fd28242
 
 
 
 
 
 
 
fa696e8
fd28242
 
 
 
 
fa696e8
fd28242
 
 
 
 
 
fa696e8
 
 
fd28242
 
 
 
fa696e8
fd28242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa696e8
fd28242
 
 
fa696e8
fd28242
fa696e8
 
fd28242
2c5db87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd28242
 
 
 
 
 
 
 
 
fa696e8
fd28242
 
 
 
 
fa696e8
fd28242
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""Unit tests for OpenAlex tool."""

from unittest.mock import AsyncMock, MagicMock

import pytest

from src.tools.openalex import OpenAlexTool
from src.utils.models import Evidence

# Sample OpenAlex response
SAMPLE_OPENALEX_RESPONSE = {
    "results": [
        {
            "id": "https://openalex.org/W12345",
            "doi": "https://doi.org/10.1234/test",
            "display_name": "Sildenafil in ED Treatment",
            "publication_year": 2024,
            "cited_by_count": 150,
            "abstract_inverted_index": {
                "Sildenafil": [0],
                "shows": [1],
                "promise": [2],
                "in": [3],
                "ED": [4],
                "treatment": [5],
            },
            "concepts": [
                {"display_name": "Sildenafil", "score": 0.95, "level": 2},
                {"display_name": "Erectile Dysfunction", "score": 0.88, "level": 1},
            ],
            "authorships": [
                {"author": {"display_name": "John Smith"}},
                {"author": {"display_name": "Jane Doe"}},
            ],
            "open_access": {"is_oa": True, "oa_url": "https://example.com/oa"},
            "best_oa_location": {"pdf_url": "https://example.com/paper.pdf"},
        }
    ]
}

# Sample response WITH PMID (for deduplication testing)
SAMPLE_OPENALEX_WITH_PMID = {
    "results": [
        {
            "id": "https://openalex.org/W98765",
            "doi": "https://doi.org/10.1038/nature12345",
            "display_name": "Paper with PMID for deduplication",
            "publication_year": 2023,
            "cited_by_count": 50,
            "abstract_inverted_index": {"Test": [0], "abstract": [1]},
            "concepts": [],
            "authorships": [],
            "open_access": {"is_oa": False},
            "best_oa_location": None,
            # CRITICAL: ids object with PMID for cross-source deduplication
            "ids": {
                "openalex": "https://openalex.org/W98765",
                "doi": "https://doi.org/10.1038/nature12345",
                "pmid": "https://pubmed.ncbi.nlm.nih.gov/29456894",
            },
        }
    ]
}


@pytest.mark.unit
class TestOpenAlexTool:
    """Tests for OpenAlexTool."""

    @pytest.fixture
    def tool(self) -> OpenAlexTool:
        return OpenAlexTool()

    @pytest.fixture
    def mock_client(self, mocker):
        """Create a standardized mock client with context manager support."""
        client = AsyncMock()
        client.__aenter__.return_value = client
        client.__aexit__.return_value = None

        # Standard response mock
        resp = MagicMock()
        resp.json.return_value = SAMPLE_OPENALEX_RESPONSE
        resp.raise_for_status.return_value = None
        client.get.return_value = resp

        mocker.patch("httpx.AsyncClient", return_value=client)
        return client

    def test_tool_name(self, tool: OpenAlexTool) -> None:
        """Tool name should be 'openalex'."""
        assert tool.name == "openalex"

    @pytest.mark.asyncio
    async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None:
        """Search should return Evidence objects."""
        results = await tool.search("sildenafil ED", max_results=5)

        assert len(results) == 1
        assert isinstance(results[0], Evidence)
        assert results[0].citation.source == "openalex"

    @pytest.mark.asyncio
    async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None:
        """Evidence metadata should include cited_by_count."""
        results = await tool.search("sildenafil ED", max_results=5)
        assert results[0].metadata["cited_by_count"] == 150

    @pytest.mark.asyncio
    async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None:
        """Evidence relevance should be based on citations (capped at 1.0)."""
        results = await tool.search("sildenafil ED", max_results=5)
        # 150 citations / 100 = 1.5 -> capped at 1.0
        assert results[0].relevance == 1.0

    @pytest.mark.asyncio
    async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None:
        """Evidence metadata should include concepts."""
        results = await tool.search("sildenafil ED", max_results=5)
        assert "Sildenafil" in results[0].metadata["concepts"]
        assert "Erectile Dysfunction" in results[0].metadata["concepts"]

    @pytest.mark.asyncio
    async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None:
        """Evidence metadata should include open access info."""
        results = await tool.search("sildenafil ED", max_results=5)
        assert results[0].metadata["is_open_access"] is True
        assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf"

    def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
        """Abstract reconstruction from inverted index."""
        inverted_index = {
            "Hello": [0],
            "world": [1],
            "this": [2],
            "is": [3],
            "a": [4],
            "test": [5],
        }
        result = tool._reconstruct_abstract(inverted_index)
        assert result == "Hello world this is a test"

    def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None:
        """Handle None or empty inverted index."""
        assert tool._reconstruct_abstract(None) == ""
        assert tool._reconstruct_abstract({}) == ""

    @pytest.mark.asyncio
    async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None:
        """Handle empty results gracefully."""
        mock_client.get.return_value.json.return_value = {"results": []}

        results = await tool.search("xyznonexistent123", max_results=5)

        assert results == []

    @pytest.mark.asyncio
    async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None:
        """Verify API call requests citation-sorted results and uses polite pool."""
        mock_client.get.return_value.json.return_value = {"results": []}

        await tool.search("sildenafil ED treatment", max_results=3)

        # Verify call params
        call_args = mock_client.get.call_args
        # args[0] is url, args[1] is kwargs
        params = call_args[1]["params"]
        assert "sildenafil" in params["search"]
        assert params["per_page"] == 3

    @pytest.mark.asyncio
    async def test_extracts_pmid_from_ids_object(self, tool: OpenAlexTool, mock_client) -> None:
        """PMID should be extracted from ids.pmid for cross-source deduplication."""
        mock_client.get.return_value.json.return_value = SAMPLE_OPENALEX_WITH_PMID

        results = await tool.search("test", max_results=1)

        assert len(results) == 1
        # PMID should be extracted from URL and stored as numeric string
        assert results[0].metadata["pmid"] == "29456894"

    @pytest.mark.asyncio
    async def test_pmid_is_none_when_not_present(self, tool: OpenAlexTool, mock_client) -> None:
        """PMID should be None when ids.pmid is not in response."""
        # SAMPLE_OPENALEX_RESPONSE has no ids.pmid field
        results = await tool.search("sildenafil ED", max_results=1)

        assert len(results) == 1
        assert results[0].metadata["pmid"] is None


@pytest.mark.integration
class TestOpenAlexIntegration:
    """Integration tests with real OpenAlex API."""

    @pytest.mark.asyncio
    async def test_real_api_returns_results(self) -> None:
        """Test actual API returns relevant results."""
        tool = OpenAlexTool()
        results = await tool.search("sildenafil ED treatment", max_results=3)

        assert len(results) > 0
        # Should have citation counts
        assert results[0].metadata["cited_by_count"] >= 0
        # Should have abstract text
        assert len(results[0].content) > 20
        # Should have concepts
        assert len(results[0].metadata["concepts"]) > 0