|
|
"""Unit tests for OpenAlex tool.""" |
|
|
|
|
|
from unittest.mock import AsyncMock, MagicMock |
|
|
|
|
|
import pytest |
|
|
|
|
|
from src.tools.openalex import OpenAlexTool |
|
|
from src.utils.models import Evidence |
|
|
|
|
|
|
|
|
SAMPLE_OPENALEX_RESPONSE = { |
|
|
"results": [ |
|
|
{ |
|
|
"id": "https://openalex.org/W12345", |
|
|
"doi": "https://doi.org/10.1234/test", |
|
|
"display_name": "Sildenafil in ED Treatment", |
|
|
"publication_year": 2024, |
|
|
"cited_by_count": 150, |
|
|
"abstract_inverted_index": { |
|
|
"Sildenafil": [0], |
|
|
"shows": [1], |
|
|
"promise": [2], |
|
|
"in": [3], |
|
|
"ED": [4], |
|
|
"treatment": [5], |
|
|
}, |
|
|
"concepts": [ |
|
|
{"display_name": "Sildenafil", "score": 0.95, "level": 2}, |
|
|
{"display_name": "Erectile Dysfunction", "score": 0.88, "level": 1}, |
|
|
], |
|
|
"authorships": [ |
|
|
{"author": {"display_name": "John Smith"}}, |
|
|
{"author": {"display_name": "Jane Doe"}}, |
|
|
], |
|
|
"open_access": {"is_oa": True, "oa_url": "https://example.com/oa"}, |
|
|
"best_oa_location": {"pdf_url": "https://example.com/paper.pdf"}, |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
SAMPLE_OPENALEX_WITH_PMID = { |
|
|
"results": [ |
|
|
{ |
|
|
"id": "https://openalex.org/W98765", |
|
|
"doi": "https://doi.org/10.1038/nature12345", |
|
|
"display_name": "Paper with PMID for deduplication", |
|
|
"publication_year": 2023, |
|
|
"cited_by_count": 50, |
|
|
"abstract_inverted_index": {"Test": [0], "abstract": [1]}, |
|
|
"concepts": [], |
|
|
"authorships": [], |
|
|
"open_access": {"is_oa": False}, |
|
|
"best_oa_location": None, |
|
|
|
|
|
"ids": { |
|
|
"openalex": "https://openalex.org/W98765", |
|
|
"doi": "https://doi.org/10.1038/nature12345", |
|
|
"pmid": "https://pubmed.ncbi.nlm.nih.gov/29456894", |
|
|
}, |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
class TestOpenAlexTool: |
|
|
"""Tests for OpenAlexTool.""" |
|
|
|
|
|
@pytest.fixture |
|
|
def tool(self) -> OpenAlexTool: |
|
|
return OpenAlexTool() |
|
|
|
|
|
@pytest.fixture |
|
|
def mock_client(self, mocker): |
|
|
"""Create a standardized mock client with context manager support.""" |
|
|
client = AsyncMock() |
|
|
client.__aenter__.return_value = client |
|
|
client.__aexit__.return_value = None |
|
|
|
|
|
|
|
|
resp = MagicMock() |
|
|
resp.json.return_value = SAMPLE_OPENALEX_RESPONSE |
|
|
resp.raise_for_status.return_value = None |
|
|
client.get.return_value = resp |
|
|
|
|
|
mocker.patch("httpx.AsyncClient", return_value=client) |
|
|
return client |
|
|
|
|
|
def test_tool_name(self, tool: OpenAlexTool) -> None: |
|
|
"""Tool name should be 'openalex'.""" |
|
|
assert tool.name == "openalex" |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Search should return Evidence objects.""" |
|
|
results = await tool.search("sildenafil ED", max_results=5) |
|
|
|
|
|
assert len(results) == 1 |
|
|
assert isinstance(results[0], Evidence) |
|
|
assert results[0].citation.source == "openalex" |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Evidence metadata should include cited_by_count.""" |
|
|
results = await tool.search("sildenafil ED", max_results=5) |
|
|
assert results[0].metadata["cited_by_count"] == 150 |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Evidence relevance should be based on citations (capped at 1.0).""" |
|
|
results = await tool.search("sildenafil ED", max_results=5) |
|
|
|
|
|
assert results[0].relevance == 1.0 |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Evidence metadata should include concepts.""" |
|
|
results = await tool.search("sildenafil ED", max_results=5) |
|
|
assert "Sildenafil" in results[0].metadata["concepts"] |
|
|
assert "Erectile Dysfunction" in results[0].metadata["concepts"] |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Evidence metadata should include open access info.""" |
|
|
results = await tool.search("sildenafil ED", max_results=5) |
|
|
assert results[0].metadata["is_open_access"] is True |
|
|
assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf" |
|
|
|
|
|
def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None: |
|
|
"""Abstract reconstruction from inverted index.""" |
|
|
inverted_index = { |
|
|
"Hello": [0], |
|
|
"world": [1], |
|
|
"this": [2], |
|
|
"is": [3], |
|
|
"a": [4], |
|
|
"test": [5], |
|
|
} |
|
|
result = tool._reconstruct_abstract(inverted_index) |
|
|
assert result == "Hello world this is a test" |
|
|
|
|
|
def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None: |
|
|
"""Handle None or empty inverted index.""" |
|
|
assert tool._reconstruct_abstract(None) == "" |
|
|
assert tool._reconstruct_abstract({}) == "" |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Handle empty results gracefully.""" |
|
|
mock_client.get.return_value.json.return_value = {"results": []} |
|
|
|
|
|
results = await tool.search("xyznonexistent123", max_results=5) |
|
|
|
|
|
assert results == [] |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""Verify API call requests citation-sorted results and uses polite pool.""" |
|
|
mock_client.get.return_value.json.return_value = {"results": []} |
|
|
|
|
|
await tool.search("sildenafil ED treatment", max_results=3) |
|
|
|
|
|
|
|
|
call_args = mock_client.get.call_args |
|
|
|
|
|
params = call_args[1]["params"] |
|
|
assert "sildenafil" in params["search"] |
|
|
assert params["per_page"] == 3 |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_extracts_pmid_from_ids_object(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""PMID should be extracted from ids.pmid for cross-source deduplication.""" |
|
|
mock_client.get.return_value.json.return_value = SAMPLE_OPENALEX_WITH_PMID |
|
|
|
|
|
results = await tool.search("test", max_results=1) |
|
|
|
|
|
assert len(results) == 1 |
|
|
|
|
|
assert results[0].metadata["pmid"] == "29456894" |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_pmid_is_none_when_not_present(self, tool: OpenAlexTool, mock_client) -> None: |
|
|
"""PMID should be None when ids.pmid is not in response.""" |
|
|
|
|
|
results = await tool.search("sildenafil ED", max_results=1) |
|
|
|
|
|
assert len(results) == 1 |
|
|
assert results[0].metadata["pmid"] is None |
|
|
|
|
|
|
|
|
@pytest.mark.integration |
|
|
class TestOpenAlexIntegration: |
|
|
"""Integration tests with real OpenAlex API.""" |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_real_api_returns_results(self) -> None: |
|
|
"""Test actual API returns relevant results.""" |
|
|
tool = OpenAlexTool() |
|
|
results = await tool.search("sildenafil ED treatment", max_results=3) |
|
|
|
|
|
assert len(results) > 0 |
|
|
|
|
|
assert results[0].metadata["cited_by_count"] >= 0 |
|
|
|
|
|
assert len(results[0].content) > 20 |
|
|
|
|
|
assert len(results[0].metadata["concepts"]) > 0 |
|
|
|