File size: 8,010 Bytes
fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 2c5db87 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 fa696e8 fd28242 2c5db87 fd28242 fa696e8 fd28242 fa696e8 fd28242 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
"""Unit tests for OpenAlex tool."""
from unittest.mock import AsyncMock, MagicMock
import pytest
from src.tools.openalex import OpenAlexTool
from src.utils.models import Evidence
# Sample OpenAlex response
SAMPLE_OPENALEX_RESPONSE = {
"results": [
{
"id": "https://openalex.org/W12345",
"doi": "https://doi.org/10.1234/test",
"display_name": "Sildenafil in ED Treatment",
"publication_year": 2024,
"cited_by_count": 150,
"abstract_inverted_index": {
"Sildenafil": [0],
"shows": [1],
"promise": [2],
"in": [3],
"ED": [4],
"treatment": [5],
},
"concepts": [
{"display_name": "Sildenafil", "score": 0.95, "level": 2},
{"display_name": "Erectile Dysfunction", "score": 0.88, "level": 1},
],
"authorships": [
{"author": {"display_name": "John Smith"}},
{"author": {"display_name": "Jane Doe"}},
],
"open_access": {"is_oa": True, "oa_url": "https://example.com/oa"},
"best_oa_location": {"pdf_url": "https://example.com/paper.pdf"},
}
]
}
# Sample response WITH PMID (for deduplication testing)
SAMPLE_OPENALEX_WITH_PMID = {
"results": [
{
"id": "https://openalex.org/W98765",
"doi": "https://doi.org/10.1038/nature12345",
"display_name": "Paper with PMID for deduplication",
"publication_year": 2023,
"cited_by_count": 50,
"abstract_inverted_index": {"Test": [0], "abstract": [1]},
"concepts": [],
"authorships": [],
"open_access": {"is_oa": False},
"best_oa_location": None,
# CRITICAL: ids object with PMID for cross-source deduplication
"ids": {
"openalex": "https://openalex.org/W98765",
"doi": "https://doi.org/10.1038/nature12345",
"pmid": "https://pubmed.ncbi.nlm.nih.gov/29456894",
},
}
]
}
@pytest.mark.unit
class TestOpenAlexTool:
"""Tests for OpenAlexTool."""
@pytest.fixture
def tool(self) -> OpenAlexTool:
return OpenAlexTool()
@pytest.fixture
def mock_client(self, mocker):
"""Create a standardized mock client with context manager support."""
client = AsyncMock()
client.__aenter__.return_value = client
client.__aexit__.return_value = None
# Standard response mock
resp = MagicMock()
resp.json.return_value = SAMPLE_OPENALEX_RESPONSE
resp.raise_for_status.return_value = None
client.get.return_value = resp
mocker.patch("httpx.AsyncClient", return_value=client)
return client
def test_tool_name(self, tool: OpenAlexTool) -> None:
"""Tool name should be 'openalex'."""
assert tool.name == "openalex"
@pytest.mark.asyncio
async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None:
"""Search should return Evidence objects."""
results = await tool.search("sildenafil ED", max_results=5)
assert len(results) == 1
assert isinstance(results[0], Evidence)
assert results[0].citation.source == "openalex"
@pytest.mark.asyncio
async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None:
"""Evidence metadata should include cited_by_count."""
results = await tool.search("sildenafil ED", max_results=5)
assert results[0].metadata["cited_by_count"] == 150
@pytest.mark.asyncio
async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None:
"""Evidence relevance should be based on citations (capped at 1.0)."""
results = await tool.search("sildenafil ED", max_results=5)
# 150 citations / 100 = 1.5 -> capped at 1.0
assert results[0].relevance == 1.0
@pytest.mark.asyncio
async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None:
"""Evidence metadata should include concepts."""
results = await tool.search("sildenafil ED", max_results=5)
assert "Sildenafil" in results[0].metadata["concepts"]
assert "Erectile Dysfunction" in results[0].metadata["concepts"]
@pytest.mark.asyncio
async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None:
"""Evidence metadata should include open access info."""
results = await tool.search("sildenafil ED", max_results=5)
assert results[0].metadata["is_open_access"] is True
assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf"
def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
"""Abstract reconstruction from inverted index."""
inverted_index = {
"Hello": [0],
"world": [1],
"this": [2],
"is": [3],
"a": [4],
"test": [5],
}
result = tool._reconstruct_abstract(inverted_index)
assert result == "Hello world this is a test"
def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None:
"""Handle None or empty inverted index."""
assert tool._reconstruct_abstract(None) == ""
assert tool._reconstruct_abstract({}) == ""
@pytest.mark.asyncio
async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None:
"""Handle empty results gracefully."""
mock_client.get.return_value.json.return_value = {"results": []}
results = await tool.search("xyznonexistent123", max_results=5)
assert results == []
@pytest.mark.asyncio
async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None:
"""Verify API call requests citation-sorted results and uses polite pool."""
mock_client.get.return_value.json.return_value = {"results": []}
await tool.search("sildenafil ED treatment", max_results=3)
# Verify call params
call_args = mock_client.get.call_args
# args[0] is url, args[1] is kwargs
params = call_args[1]["params"]
assert "sildenafil" in params["search"]
assert params["per_page"] == 3
@pytest.mark.asyncio
async def test_extracts_pmid_from_ids_object(self, tool: OpenAlexTool, mock_client) -> None:
"""PMID should be extracted from ids.pmid for cross-source deduplication."""
mock_client.get.return_value.json.return_value = SAMPLE_OPENALEX_WITH_PMID
results = await tool.search("test", max_results=1)
assert len(results) == 1
# PMID should be extracted from URL and stored as numeric string
assert results[0].metadata["pmid"] == "29456894"
@pytest.mark.asyncio
async def test_pmid_is_none_when_not_present(self, tool: OpenAlexTool, mock_client) -> None:
"""PMID should be None when ids.pmid is not in response."""
# SAMPLE_OPENALEX_RESPONSE has no ids.pmid field
results = await tool.search("sildenafil ED", max_results=1)
assert len(results) == 1
assert results[0].metadata["pmid"] is None
@pytest.mark.integration
class TestOpenAlexIntegration:
"""Integration tests with real OpenAlex API."""
@pytest.mark.asyncio
async def test_real_api_returns_results(self) -> None:
"""Test actual API returns relevant results."""
tool = OpenAlexTool()
results = await tool.search("sildenafil ED treatment", max_results=3)
assert len(results) > 0
# Should have citation counts
assert results[0].metadata["cited_by_count"] >= 0
# Should have abstract text
assert len(results[0].content) > 20
# Should have concepts
assert len(results[0].metadata["concepts"]) > 0
|