Spaces:

VibecoderMcSwaggins
/

DeepBoner

Paused

App Files Files Community

VibecoderMcSwaggins commited on Nov 29, 2025

Commit

2cbcbfd

unverified ·

2 Parent(s): 0257d2f af7d422

Merge pull request #69 from The-Obstacle-Is-The-Way/dev

Browse files

feat: SPEC_03/04/05 Implementation (OpenAlex + Magentic UX + Cleanup)

Files changed (12) hide show

docs/specs/SPEC_03_OPENALEX_INTEGRATION.md +522 -0
docs/specs/SPEC_04_MAGENTIC_UX.md +237 -0
docs/specs/SPEC_05_ORCHESTRATOR_CLEANUP.md +162 -0
src/app.py +6 -10
src/orchestrator_factory.py +1 -0
src/orchestrator_hierarchical.py +0 -95
src/orchestrator_magentic.py +2 -2
src/tools/__init__.py +2 -0
src/tools/openalex.py +162 -0
src/utils/config.py +4 -0
tests/unit/test_app_timeout.py +60 -0
tests/unit/tools/test_openalex.py +165 -0

docs/specs/SPEC_03_OPENALEX_INTEGRATION.md ADDED Viewed

	@@ -0,0 +1,522 @@

+# SPEC 03: OpenAlex Integration
+## Priority: P1 (Feature Enhancement)
+## Problem Statement
+We currently search 3 sources (PubMed, Europe PMC, ClinicalTrials.gov) but lack **citation metrics**. We cannot distinguish a highly-cited landmark paper from an obscure one. OpenAlex provides:
+1. **Citation counts** - Prioritize authoritative papers
+2. **Citation networks** - "Who cites whom"
+3. **Concept tagging** - Hierarchical categorization
+4. **Open access links** - Direct PDF URLs
+**FREE API. No key required. 209M+ works indexed.**
+> **Note:** This spec supersedes `docs/future-roadmap/phases/15_PHASE_OPENALEX.md`.
+## Groundwork Already Done
+```python
+# src/utils/models.py:9
+SourceName = Literal["pubmed", "clinicaltrials", "europepmc", "preprint", "openalex", "web"]
+# src/utils/models.py:39-42
+metadata: dict[str, Any] = Field(
+    default_factory=dict,
+    description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
+)
+```
+The infrastructure is ready. We just need to build the tool.
+## OpenAlex API Reference
+### Endpoint
+```
+GET https://api.openalex.org/works
+```
+### Key Parameters
+| Parameter | Description |
+|-----------|-------------|
+| `search` | Full-text search across title, abstract, fulltext |
+| `filter` | Constrain results (e.g., `type:article`, `has_abstract:true`) |
+| `sort` | Order results (e.g., `cited_by_count:desc`) |
+| `per_page` | Results per page (max 200) |
+| `mailto` | Email for polite pool (higher rate limits) |
+### Example Request
+```bash
+GET https://api.openalex.org/works?search=metformin%20cancer&filter=type:article,has_abstract:true&sort=cited_by_count:desc&per_page=10&mailto=deepboner-research@proton.me
+```
+### Response Structure
+```json
+{
+  "results": [
+    {
+      "id": "https://openalex.org/W2741809807",
+      "doi": "https://doi.org/10.1234/example",
+      "display_name": "Paper Title",
+      "publication_year": 2024,
+      "cited_by_count": 150,
+      "abstract_inverted_index": {
+        "word1": [0],
+        "word2": [1, 5]
+      },
+      "concepts": [
+        {"display_name": "Metformin", "score": 0.95, "level": 2}
+      ],
+      "authorships": [
+        {"author": {"display_name": "John Smith"}}
+      ],
+      "open_access": {
+        "is_oa": true,
+        "oa_url": "https://example.com/pdf"
+      },
+      "best_oa_location": {
+        "pdf_url": "https://example.com/paper.pdf"
+      }
+    }
+  ]
+}
+```
+## Architecture
+### Class Diagram
+```
+┌─────────────────────────────────────┐
+│          SearchTool (Protocol)       │
+│  ─────────────────────────────────  │
+│  + name: str                         │
+│  + search(query, max_results) → list[Evidence]  │
+└──────────────────┬──────────────────┘
+                   │ implements
+┌──────────────────▼──────────────────┐
+│           OpenAlexTool               │
+│  ─────────────────────────────────  │
+│  - BASE_URL: str                     │
+│  - POLITE_EMAIL: str                 │
+│  ─────────────────────────────────  │
+│  + name → "openalex"                 │
+│  + search(query, max_results) → list[Evidence]  │
+│  - _reconstruct_abstract(inverted_index) → str  │
+│  - _to_evidence(work) → Evidence     │
+│  - _extract_authors(authorships) → list[str]    │
+│  - _extract_concepts(concepts) → list[str]      │
+└─────────────────────────────────────┘
+```
+## TDD Implementation Plan
+### Red Phase: Write Failing Tests First
+**File: `tests/unit/tools/test_openalex.py`**
+```python
+"""Unit tests for OpenAlex tool - TDD RED phase."""
+from unittest.mock import AsyncMock, MagicMock
+import pytest
+from src.tools.openalex import OpenAlexTool
+from src.utils.models import Evidence
+# Sample OpenAlex response
+SAMPLE_OPENALEX_RESPONSE = {
+    "results": [
+        {
+            "id": "https://openalex.org/W12345",
+            "doi": "https://doi.org/10.1234/test",
+            "display_name": "Metformin in Cancer Treatment",
+            "publication_year": 2024,
+            "cited_by_count": 150,
+            "abstract_inverted_index": {
+                "Metformin": [0],
+                "shows": [1],
+                "promise": [2],
+                "in": [3],
+                "cancer": [4],
+                "treatment": [5],
+            },
+            "concepts": [
+                {"display_name": "Metformin", "score": 0.95, "level": 2},
+                {"display_name": "Cancer", "score": 0.88, "level": 1},
+            ],
+            "authorships": [
+                {"author": {"display_name": "John Smith"}},
+                {"author": {"display_name": "Jane Doe"}},
+            ],
+            "open_access": {"is_oa": True, "oa_url": "https://example.com/oa"},
+            "best_oa_location": {"pdf_url": "https://example.com/paper.pdf"},
+        }
+    ]
+}
+@pytest.mark.unit
+class TestOpenAlexTool:
+    """Tests for OpenAlexTool."""
+    @pytest.fixture
+    def tool(self) -> OpenAlexTool:
+        return OpenAlexTool()
+    @pytest.fixture
+    def mock_client(self, mocker):
+        """Create a standardized mock client with context manager support."""
+        client = AsyncMock()
+        client.__aenter__.return_value = client
+        client.__aexit__.return_value = None
+        # Standard response mock
+        resp = MagicMock()
+        resp.json.return_value = SAMPLE_OPENALEX_RESPONSE
+        resp.raise_for_status.return_value = None
+        client.get.return_value = resp
+        mocker.patch("httpx.AsyncClient", return_value=client)
+        return client
+    def test_tool_name(self, tool: OpenAlexTool) -> None:
+        """Tool name should be 'openalex'."""
+        assert tool.name == "openalex"
+    @pytest.mark.asyncio
+    async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None:
+        """Search should return Evidence objects."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert len(results) == 1
+        assert isinstance(results[0], Evidence)
+        assert results[0].citation.source == "openalex"
+    @pytest.mark.asyncio
+    async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence metadata should include cited_by_count."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert results[0].metadata["cited_by_count"] == 150
+    @pytest.mark.asyncio
+    async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence relevance should be based on citations (capped at 1.0)."""
+        results = await tool.search("metformin cancer", max_results=5)
+        # 150 citations / 100 = 1.5 -> capped at 1.0
+        assert results[0].relevance == 1.0
+    @pytest.mark.asyncio
+    async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence metadata should include concepts."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert "Metformin" in results[0].metadata["concepts"]
+        assert "Cancer" in results[0].metadata["concepts"]
+    @pytest.mark.asyncio
+    async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence metadata should include open access info."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert results[0].metadata["is_open_access"] is True
+        assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf"
+    def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
+        """Abstract reconstruction from inverted index."""
+        inverted_index = {
+            "Hello": [0],
+            "world": [1],
+            "this": [2],
+            "is": [3],
+            "a": [4],
+            "test": [5],
+        }
+        result = tool._reconstruct_abstract(inverted_index)
+        assert result == "Hello world this is a test"
+    def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None:
+        """Handle None or empty inverted index."""
+        assert tool._reconstruct_abstract(None) == ""
+        assert tool._reconstruct_abstract({}) == ""
+    @pytest.mark.asyncio
+    async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None:
+        """Handle empty results gracefully."""
+        mock_client.get.return_value.json.return_value = {"results": []}
+        results = await tool.search("xyznonexistent123", max_results=5)
+        assert results == []
+    @pytest.mark.asyncio
+    async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None:
+        """Verify API call requests citation-sorted results and uses polite pool."""
+        mock_client.get.return_value.json.return_value = {"results": []}
+        await tool.search("test query", max_results=5)
+        # Verify call params
+        call_args = mock_client.get.call_args
+        params = call_args[1]["params"]
+        assert params["sort"] == "cited_by_count:desc"
+        assert params["mailto"] == tool.POLITE_EMAIL
+        assert "type:article" in params["filter"]
+        assert "has_abstract:true" in params["filter"]
+```
+### Green Phase: Implement to Pass Tests
+**File: `src/tools/openalex.py`**
+```python
+"""OpenAlex search tool - citation-aware scholarly search."""
+from typing import Any
+import httpx
+from tenacity import retry, stop_after_attempt, wait_exponential
+from src.utils.exceptions import SearchError
+from src.utils.models import Citation, Evidence
+class OpenAlexTool:
+    """
+    Search OpenAlex for scholarly works with citation metrics.
+    OpenAlex indexes 209M+ works and provides:
+    - Citation counts (prioritize influential papers)
+    - Concept tagging (hierarchical classification)
+    - Open access links (direct PDF URLs)
+    - Related works (ML-powered similarity)
+    API Docs: https://docs.openalex.org
+    Rate Limits: Polite pool with mailto = 100k/day
+    """
+    BASE_URL = "https://api.openalex.org/works"
+    POLITE_EMAIL = "deepboner-research@proton.me"
+    @property
+    def name(self) -> str:
+        return "openalex"
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        reraise=True,
+    )
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """
+        Search OpenAlex, sorted by citation count.
+        Args:
+            query: Search terms
+            max_results: Maximum results to return
+        Returns:
+            List of Evidence objects with citation metadata
+        """
+        params: dict[str, str | int] = {
+            "search": query,
+            "filter": "type:article,has_abstract:true",  # Only articles with abstracts
+            "sort": "cited_by_count:desc",  # Most cited first
+            "per_page": min(max_results, 100),
+            "mailto": self.POLITE_EMAIL,
+        }
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            try:
+                response = await client.get(self.BASE_URL, params=params)
+                response.raise_for_status()
+                data = response.json()
+                works = data.get("results", [])
+                return [self._to_evidence(work) for work in works[:max_results]]
+            except httpx.HTTPStatusError as e:
+                raise SearchError(f"OpenAlex API error: {e}") from e
+            except httpx.RequestError as e:
+                raise SearchError(f"OpenAlex connection failed: {e}") from e
+    def _to_evidence(self, work: dict[str, Any]) -> Evidence:
+        """Convert OpenAlex work to Evidence with rich metadata."""
+        # Extract basic fields
+        title = work.get("display_name", "Untitled")
+        doi = work.get("doi", "")
+        year = work.get("publication_year", "Unknown")
+        cited_by_count = work.get("cited_by_count", 0)
+        # Reconstruct abstract from inverted index
+        abstract = self._reconstruct_abstract(work.get("abstract_inverted_index"))
+        if not abstract:
+            # Should be caught by filter=has_abstract:true, but defensive coding
+            abstract = f"[No abstract available. Cited by {cited_by_count} works.]"
+        # Extract authors (limit to 5)
+        authors = self._extract_authors(work.get("authorships", []))
+        # Extract concepts (top 5 by score)
+        concepts = self._extract_concepts(work.get("concepts", []))
+        # Open access info
+        oa_info = work.get("open_access", {})
+        is_oa = oa_info.get("is_oa", False)
+        # Get PDF URL (prefer best_oa_location)
+        best_oa = work.get("best_oa_location", {})
+        pdf_url = best_oa.get("pdf_url") if best_oa else None
+        # Build URL
+        if doi:
+            url = doi if doi.startswith("http") else f"https://doi.org/{doi}"
+        else:
+            openalex_id = work.get("id", "")
+            url = openalex_id if openalex_id else "https://openalex.org"
+        # Prepend citation badge to content
+        citation_badge = f"[Cited by {cited_by_count}] " if cited_by_count > 0 else ""
+        content = f"{citation_badge}{abstract[:1900]}"
+        # Calculate relevance: normalized citation count (capped at 1.0 for 100 citations)
+        # 100 citations is a very strong signal in most fields.
+        relevance = min(1.0, cited_by_count / 100.0)
+        return Evidence(
+            content=content[:2000],
+            citation=Citation(
+                source="openalex",
+                title=title[:500],
+                url=url,
+                date=str(year),
+                authors=authors,
+            ),
+            relevance=relevance,
+            metadata={
+                "cited_by_count": cited_by_count,
+                "concepts": concepts,
+                "is_open_access": is_oa,
+                "pdf_url": pdf_url,
+            },
+        )
+    def _reconstruct_abstract(self, inverted_index: dict[str, list[int]] | None) -> str:
+        """Rebuild abstract from {"word": [positions]} format."""
+        if not inverted_index:
+            return ""
+        position_word: dict[int, str] = {}
+        for word, positions in inverted_index.items():
+            for pos in positions:
+                position_word[pos] = word
+        if not position_word:
+            return ""
+        max_pos = max(position_word.keys())
+        return " ".join(position_word.get(i, "") for i in range(max_pos + 1))
+    def _extract_authors(self, authorships: list[dict[str, Any]]) -> list[str]:
+        """Extract author names from authorships array."""
+        authors = []
+        for authorship in authorships[:5]:
+            author = authorship.get("author", {})
+            name = author.get("display_name")
+            if name:
+                authors.append(name)
+        return authors
+    def _extract_concepts(self, concepts: list[dict[str, Any]]) -> list[str]:
+        """Extract concept names, sorted by score."""
+        sorted_concepts = sorted(concepts, key=lambda c: c.get("score", 0), reverse=True)
+        return [c.get("display_name", "") for c in sorted_concepts[:5] if c.get("display_name")]
+```
+### Refactor Phase: Clean Integration
+**Update: `src/tools/__init__.py`**
+```python
+"""Search tools package."""
+from src.tools.base import SearchTool
+from src.tools.clinicaltrials import ClinicalTrialsTool
+from src.tools.europepmc import EuropePMCTool
+from src.tools.openalex import OpenAlexTool
+from src.tools.pubmed import PubMedTool
+from src.tools.search_handler import SearchHandler
+__all__ = [
+    "ClinicalTrialsTool",
+    "EuropePMCTool",
+    "OpenAlexTool",
+    "PubMedTool",
+    "SearchHandler",
+    "SearchTool",
+]
+```
+## Test Matrix
+| Test | What It Validates | Priority |
+|------|------------------|----------|
+| `test_tool_name` | Returns "openalex" | P0 |
+| `test_search_returns_evidence` | Returns `list[Evidence]` | P0 |
+| `test_search_includes_citation_count` | `metadata["cited_by_count"]` populated | P0 |
+| `test_search_calculates_relevance` | `relevance` derived from citations | P1 |
+| `test_search_includes_concepts` | `metadata["concepts"]` populated | P0 |
+| `test_search_includes_open_access_info` | `metadata["is_open_access"]` and `pdf_url` | P1 |
+| `test_reconstruct_abstract` | Inverted index → text | P0 |
+| `test_reconstruct_abstract_empty` | Handle None/empty inputs | P1 |
+| `test_search_empty_results` | Return `[]` for no matches | P0 |
+| `test_search_params` | API params (`sort`, `filter`, `mailto`) | P1 |
+## Integration Test
+```python
+@pytest.mark.integration
+class TestOpenAlexIntegration:
+    """Integration tests with real OpenAlex API."""
+    @pytest.mark.asyncio
+    async def test_real_api_returns_results(self) -> None:
+        """Test actual API returns relevant results."""
+        tool = OpenAlexTool()
+        results = await tool.search("metformin cancer treatment", max_results=3)
+        assert len(results) > 0
+        # Should have citation counts
+        assert results[0].metadata["cited_by_count"] >= 0
+        # Should have abstract text
+        assert len(results[0].content) > 50
+        # Should have concepts
+        assert len(results[0].metadata["concepts"]) > 0
+```
+## Acceptance Criteria
+- [x] `OpenAlexTool` implements `SearchTool` Protocol
+- [x] Tool returns `list[Evidence]` with citation metadata
+- [x] Abstract reconstructed from inverted index format
+- [x] Relevance calculated from citation count (capped at 1.0)
+- [x] Exported from `src/tools/__init__.py`
+- [x] Integrated into `src/app.py` SearchHandler
+- [x] UI description updated to mention OpenAlex
+- [x] All unit tests pass (11 tests)
+- [x] Integration test passes with real API
+**Status: IMPLEMENTED** (commits fd28242, cb46aac)
+## Files Modified
+1. `src/tools/openalex.py` - NEW: OpenAlex tool implementation
+2. `tests/unit/tools/test_openalex.py` - NEW: Unit and integration tests
+3. `src/tools/__init__.py` - Export OpenAlexTool
+4. `src/app.py` - Wire OpenAlexTool into SearchHandler

docs/specs/SPEC_04_MAGENTIC_UX.md ADDED Viewed

	@@ -0,0 +1,237 @@

+# SPEC 04: Magentic Mode UX Improvements
+## Priority: P1 (Demo Quality)
+## Problem Statement
+Magentic (advanced) mode has several UX issues that degrade the user experience:
+1. **P0: Chat history cleared on timeout** - When timeout occurs, all progress events are erased
+2. **P1: Timeout too short** - 300s default insufficient for complex multi-agent workflows
+3. **P1: Timeout not configurable** - Users can't adjust based on their needs
+4. **P2: No graceful degradation** - System doesn't synthesize early when timeout approaches
+## Related Issues
+- GitHub Issue #68: Magentic mode times out at 300s without completing
+- GitHub Issue #65: Demo timing (predecessor, now closed)
+- SPEC_01: Demo Termination (implemented the basic timeout)
+## Bug Analysis
+### Bug 1: Chat History Cleared on Timeout (P0)
+**Location**: `src/app.py:205-206`
+**Current Code**:
+```python
+if event.type == "complete":
+    yield event.message  # BUG: Discards all accumulated progress!
+else:
+    event_md = event.to_markdown()
+    response_parts.append(event_md)
+    yield "\n\n".join(response_parts)
+```
+**Problem**: The `complete` event (including timeout) yields ONLY the completion message, discarding all the `response_parts` that show what the system actually did.
+**User Sees**:
+```
+Research timed out. Synthesizing available evidence...
+```
+**User Should See**:
+```
+🚀 STARTED: Starting research (Magentic mode)...
+⏳ THINKING: Multi-agent reasoning in progress...
+🧠 JUDGING: Manager (user_task): Research drug repurposing...
+🧠 JUDGING: Manager (task_ledger): We are working to address...
+🧠 JUDGING: Manager (instruction): Task: Retrieve human clinical...
+⏱️ Research timed out. Synthesizing available evidence...
+```
+**Fix**:
+```python
+if event.type == "complete":
+    response_parts.append(event.message)
+    yield "\n\n".join(response_parts)  # Preserves all progress
+```
+### Bug 2: Timeout Too Short (P1)
+**Location**: `src/orchestrator_magentic.py:48`
+**Current**: `timeout_seconds: float = 300.0` (5 minutes)
+**Problem**: Multi-agent workflows with 4 agents (Search, Hypothesis, Judge, Report) and up to 10 rounds can theoretically take 60+ minutes. Even typical runs take 5-10 minutes.
+**Analysis of Per-Agent Latency**:
+| Agent | Typical Latency | Worst Case |
+|-------|-----------------|------------|
+| SearchAgent | 30-60s | 120s (network issues) |
+| HypothesisAgent | 60-90s | 180s (complex reasoning) |
+| JudgeAgent | 30-60s | 120s |
+| ReportAgent | 60-120s | 240s (long synthesis) |
+With `max_rounds=10`: 10 × 4 × 90s = 60 minutes worst case.
+### Bug 3: Timeout Not Configurable (P1)
+**Problem**: The factory doesn't pass timeout config to MagenticOrchestrator.
+**Location**: `src/orchestrator_factory.py:52-55`
+```python
+return orchestrator_cls(
+    max_rounds=config.max_iterations if config else 10,
+    api_key=api_key,
+    # Missing: timeout_seconds
+)
+```
+## Proposed Solutions
+### Fix 1: Preserve Chat History (P0)
+```python
+# src/app.py - Replace lines 205-212
+if event.type == "complete":
+    # Preserve accumulated progress + add completion message
+    response_parts.append(event.message)
+    yield "\n\n".join(response_parts)
+else:
+    event_md = event.to_markdown()
+    response_parts.append(event_md)
+    yield "\n\n".join(response_parts)
+```
+**Test**:
+```python
+@pytest.mark.asyncio
+async def test_timeout_preserves_chat_history(mock_magentic_workflow):
+    """Verify timeout doesn't erase progress events."""
+    # Mock workflow that yields events then times out
+    events = []
+    async for event in research_agent("test", [], "advanced", "sk-test"):
+        events.append(event)
+    # Should contain both progress AND timeout message
+    output = events[-1]  # Final yield
+    assert "STARTED" in output
+    assert "timed out" in output.lower()
+```
+### Fix 2: Increase Default Timeout (P1)
+```python
+# src/orchestrator_magentic.py
+def __init__(
+    self,
+    max_rounds: int = 10,
+    chat_client: OpenAIChatClient | None = None,
+    api_key: str | None = None,
+    timeout_seconds: float = 600.0,  # Changed: 10 minutes (was 5)
+) -> None:
+```
+### Fix 3: Make Timeout Configurable via Environment (P1)
+```python
+# src/utils/config.py
+class Settings(BaseSettings):
+    # ... existing fields ...
+    magentic_timeout: int = Field(
+        default=600,
+        description="Timeout for Magentic mode in seconds",
+    )
+```
+```python
+# src/orchestrator_factory.py
+return orchestrator_cls(
+    max_rounds=config.max_iterations if config else 10,
+    api_key=api_key,
+    timeout_seconds=settings.magentic_timeout,  # NEW
+)
+```
+### Fix 4: Graceful Degradation (P2 - Future)
+```python
+# src/orchestrator_magentic.py - Inside run() loop
+elapsed = time.time() - start_time
+time_remaining = self._timeout_seconds - elapsed
+# If 80% of time elapsed, force synthesis
+if time_remaining < self._timeout_seconds * 0.2:
+    yield AgentEvent(
+        type="synthesizing",
+        message="Time limit approaching, synthesizing available evidence...",
+        iteration=iteration,
+    )
+    # TODO: Inject signal to trigger ReportAgent
+    break
+```
+## Implementation Order
+1. **Fix 1 (P0)**: Chat history preservation - 5 minutes, 1 line change
+2. **Fix 2 (P1)**: Increase default timeout - 5 minutes, 1 line change
+3. **Fix 3 (P1)**: Environment config - 15 minutes, 3 files
+4. **Fix 4 (P2)**: Graceful degradation - 1 hour, research agent-framework signals
+## Acceptance Criteria
+- [x] Timeout shows ALL progress events, not just timeout message
+- [x] Default timeout increased to 600s (10 minutes)
+- [x] Timeout configurable via `MAGENTIC_TIMEOUT` env var
+- [x] Tests verify chat history preserved on timeout
+- [ ] (P2) System synthesizes early when timeout approaches (Future)
+**Status: IMPLEMENTED** (commit cb46aac)
+## Files to Modify
+1. `src/app.py` - Fix chat history clearing (lines 205-212)
+2. `src/orchestrator_magentic.py` - Increase default timeout
+3. `src/utils/config.py` - Add `magentic_timeout` setting
+4. `src/orchestrator_factory.py` - Pass timeout to MagenticOrchestrator
+5. `tests/unit/test_app_timeout.py` - NEW: Test chat history preservation
+## Test Plan
+```python
+# tests/unit/test_app_timeout.py
+@pytest.mark.asyncio
+async def test_complete_event_preserves_history():
+    """Complete events should append to history, not replace it."""
+    from src.app import research_agent
+    # This requires mocking the orchestrator to emit events then complete
+    # Verify final output contains ALL events, not just completion message
+    pass
+@pytest.mark.asyncio
+async def test_timeout_configurable():
+    """Verify MAGENTIC_TIMEOUT env var is respected."""
+    import os
+    os.environ["MAGENTIC_TIMEOUT"] = "120"
+    from src.utils.config import Settings
+    settings = Settings()
+    assert settings.magentic_timeout == 120
+```
+## Risk Assessment
+| Fix | Risk | Mitigation |
+|-----|------|------------|
+| Fix 1 | Low | Simple change, well-understood |
+| Fix 2 | Low | Just a default value change |
+| Fix 3 | Medium | New config, needs validation |
+| Fix 4 | High | Requires understanding agent-framework internals |
+## Dependencies
+- Fix 4 requires investigation of `agent-framework-core` to understand how to signal early termination to the workflow manager.

docs/specs/SPEC_05_ORCHESTRATOR_CLEANUP.md ADDED Viewed

	@@ -0,0 +1,162 @@

+# SPEC 05: Orchestrator Module Cleanup
+## Priority: P3 (Code Hygiene)
+## Problem Statement
+The codebase has an inconsistent orchestrator organization:
+```
+src/
+├── orchestrator/              # EMPTY folder (just . and ..)
+├── orchestrator.py            # Simple mode (15KB, 67% coverage)
+├── orchestrator_factory.py    # Factory pattern (2.5KB, 87% coverage)
+├── orchestrator_hierarchical.py  # Unused (3KB, 0% coverage)
+└── orchestrator_magentic.py   # Advanced mode (11KB, 68% coverage)
+```
+## Related Issues
+- GitHub Issue #67: Clean up empty src/orchestrator/ folder
+## Analysis
+### Empty Folder
+The `src/orchestrator/` folder was created but never populated. All orchestrator implementations remain flat in `src/`.
+### Dead Code
+`orchestrator_hierarchical.py` has **0% test coverage** and appears to be an early prototype that was never integrated:
+- Not imported anywhere in production code
+- Not referenced in any tests
+- Pattern doesn't match current architecture
+### Import Pattern
+All 30+ imports use the flat structure:
+```python
+from src.orchestrator import Orchestrator
+from src.orchestrator_factory import create_orchestrator
+from src.orchestrator_magentic import MagenticOrchestrator
+```
+## Options
+### Option A: Minimal Cleanup (Recommended)
+Delete the empty folder and dead code:
+```bash
+rm -rf src/orchestrator/
+rm src/orchestrator_hierarchical.py
+```
+**Pros**: Zero import changes, minimal risk, quick
+**Cons**: Flat structure remains
+### Option B: Full Consolidation (Future)
+Move everything into a proper module:
+```
+src/orchestrator/
+├── __init__.py        # Re-export for backwards compat
+├── base.py            # Shared protocols/types
+├── simple.py          # From orchestrator.py
+├── magentic.py        # From orchestrator_magentic.py
+└── factory.py         # From orchestrator_factory.py
+```
+**Pros**: Cleaner organization, better separation
+**Cons**: 30+ import changes, risk of breakage, time investment
+### Option C: Hybrid (Pragmatic)
+Delete empty folder + dead code now. Create `src/orchestrator/__init__.py` that re-exports from flat files:
+```python
+# src/orchestrator/__init__.py
+from src.orchestrator import Orchestrator
+from src.orchestrator_factory import create_orchestrator
+from src.orchestrator_magentic import MagenticOrchestrator
+__all__ = ["Orchestrator", "create_orchestrator", "MagenticOrchestrator"]
+```
+**Problem**: This creates confusing import semantics (`src.orchestrator` would be both a module and a file).
+## Recommendation
+**Option A** for now. The flat structure works fine and changing it provides no functional benefit. The empty folder and dead code should be removed.
+Option B can be revisited post-hackathon when there's time for a proper refactor.
+## Implementation
+### Step 1: Remove Empty Folder
+```bash
+rm -rf src/orchestrator/
+```
+### Step 2: Remove Dead Code (Optional)
+```bash
+rm src/orchestrator_hierarchical.py
+```
+If keeping for reference, add a deprecation notice:
+```python
+# src/orchestrator_hierarchical.py
+"""
+DEPRECATED: Unused hierarchical orchestrator prototype.
+Kept for reference only. See orchestrator.py (simple) or
+orchestrator_magentic.py (advanced) for active implementations.
+"""
+```
+### Step 3: Verify
+```bash
+make check  # All 142 tests should pass
+```
+## Acceptance Criteria
+- [x] Empty `src/orchestrator/` folder deleted
+- [x] No broken imports (grep for `from src.orchestrator/`)
+- [x] Tests pass (154 unit tests)
+- [x] `orchestrator_hierarchical.py` removed
+**Status: IMPLEMENTED** (commit cb46aac)
+## Files to Modify
+1. `src/orchestrator/` - DELETE (empty folder)
+2. `src/orchestrator_hierarchical.py` - DELETE or add deprecation notice
+## Test Plan
+```bash
+# Verify nothing imports from the folder path
+grep -r "from src.orchestrator/" src tests
+# Should return nothing
+# Verify nothing imports hierarchical
+grep -r "orchestrator_hierarchical" src tests
+# Should return nothing (except possibly this spec)
+# Run full test suite
+make check
+```
+## Risk Assessment
+| Action | Risk | Mitigation |
+|--------|------|------------|
+| Delete empty folder | None | It's empty, nothing uses it |
+| Delete hierarchical.py | Low | 0% coverage, no imports |
+| Full consolidation | Medium | Many import changes |
+## Time Estimate
+- Option A: 5 minutes
+- Option B: 1-2 hours (plus testing)

src/app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, Mock
 from src.orchestrator_factory import create_orchestrator
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.europepmc import EuropePMCTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.config import settings
@@ -45,7 +46,7 @@ def configure_orchestrator(
     # Create search tools
     search_handler = SearchHandler(
-        tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()],
         timeout=config.search_timeout,
     )
@@ -176,13 +177,7 @@ async def research_agent(
         # Immediate backend info + loading feedback so user knows something is happening
         yield (
             f"🧠 **Backend**: {backend_name}\n\n"
-            "⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC...\n"
-        )
-        # Immediate loading feedback so user knows something is happening
-        yield (
-            f"🧠 **Backend**: {backend_name}\n\n"
-            "⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC...\n"
         )
         async for event in orchestrator.run(message):
@@ -203,7 +198,8 @@ async def research_agent(
             # Handle complete events specially
             if event.type == "complete":
-                yield event.message
             else:
                 # Format and append non-streaming events
                 event_md = event.to_markdown()
@@ -240,7 +236,7 @@ def create_demo() -> tuple[gr.ChatInterface, gr.Accordion]:
         title="🍆 DeepBoner",
         description=(
             "*AI-Powered Sexual Health Research Agent — searches PubMed, "
-            "ClinicalTrials.gov & Europe PMC*\n\n"
             "Deep research for sexual wellness, ED treatments, hormone therapy, "
             "libido, and reproductive health - for all genders.\n\n"
             "---\n"

 from src.orchestrator_factory import create_orchestrator
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.europepmc import EuropePMCTool
+from src.tools.openalex import OpenAlexTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.config import settings
     # Create search tools
     search_handler = SearchHandler(
+        tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool(), OpenAlexTool()],
         timeout=config.search_timeout,
     )
         # Immediate backend info + loading feedback so user knows something is happening
         yield (
             f"🧠 **Backend**: {backend_name}\n\n"
+            "⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC, OpenAlex...\n"
         )
         async for event in orchestrator.run(message):
             # Handle complete events specially
             if event.type == "complete":
+                response_parts.append(event.message)
+                yield "\n\n".join(response_parts)
             else:
                 # Format and append non-streaming events
                 event_md = event.to_markdown()
         title="🍆 DeepBoner",
         description=(
             "*AI-Powered Sexual Health Research Agent — searches PubMed, "
+            "ClinicalTrials.gov, Europe PMC & OpenAlex*\n\n"
             "Deep research for sexual wellness, ED treatments, hormone therapy, "
             "libido, and reproductive health - for all genders.\n\n"
             "---\n"

src/orchestrator_factory.py CHANGED Viewed

@@ -52,6 +52,7 @@ def create_orchestrator(
         return orchestrator_cls(
             max_rounds=config.max_iterations if config else 10,
             api_key=api_key,
         )
     # Simple mode requires handlers

         return orchestrator_cls(
             max_rounds=config.max_iterations if config else 10,
             api_key=api_key,
+            timeout_seconds=settings.magentic_timeout,
         )
     # Simple mode requires handlers

src/orchestrator_hierarchical.py DELETED Viewed

@@ -1,95 +0,0 @@
-"""Hierarchical orchestrator using middleware and sub-teams."""
-import asyncio
-from collections.abc import AsyncGenerator
-import structlog
-from src.agents.judge_agent_llm import LLMSubIterationJudge
-from src.agents.magentic_agents import create_search_agent
-from src.middleware.sub_iteration import SubIterationMiddleware, SubIterationTeam
-from src.services.embeddings import get_embedding_service
-from src.state import init_magentic_state
-from src.utils.models import AgentEvent
-logger = structlog.get_logger()
-class ResearchTeam(SubIterationTeam):
-    """Adapts Magentic ChatAgent to SubIterationTeam protocol."""
-    def __init__(self) -> None:
-        self.agent = create_search_agent()
-    async def execute(self, task: str) -> str:
-        response = await self.agent.run(task)
-        if response.messages:
-            for msg in reversed(response.messages):
-                if msg.role == "assistant" and msg.text:
-                    return str(msg.text)
-        return "No response from agent."
-class HierarchicalOrchestrator:
-    """Orchestrator that uses hierarchical teams and sub-iterations."""
-    def __init__(self) -> None:
-        self.team = ResearchTeam()
-        self.judge = LLMSubIterationJudge()
-        self.middleware = SubIterationMiddleware(self.team, self.judge, max_iterations=5)
-    async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
-        logger.info("Starting hierarchical orchestrator", query=query)
-        try:
-            service = get_embedding_service()
-            init_magentic_state(service)
-        except Exception as e:
-            logger.warning(
-                "Embedding service initialization failed, using default state",
-                error=str(e),
-            )
-            init_magentic_state()
-        yield AgentEvent(type="started", message=f"Starting research: {query}")
-        queue: asyncio.Queue[AgentEvent | None] = asyncio.Queue()
-        async def event_callback(event: AgentEvent) -> None:
-            await queue.put(event)
-        task_future = asyncio.create_task(self.middleware.run(query, event_callback))
-        while not task_future.done():
-            get_event = asyncio.create_task(queue.get())
-            done, _ = await asyncio.wait(
-                {task_future, get_event}, return_when=asyncio.FIRST_COMPLETED
-            )
-            if get_event in done:
-                event = get_event.result()
-                if event:
-                    yield event
-            else:
-                get_event.cancel()
-        # Process remaining events
-        while not queue.empty():
-            ev = queue.get_nowait()
-            if ev:
-                yield ev
-        try:
-            result, assessment = await task_future
-            assessment_text = assessment.reasoning if assessment else "None"
-            yield AgentEvent(
-                type="complete",
-                message=(
-                    f"Research complete.\n\nResult:\n{result}\n\nAssessment:\n{assessment_text}"
-                ),
-                data={"assessment": assessment.model_dump() if assessment else None},
-            )
-        except Exception as e:
-            logger.error("Orchestrator failed", error=str(e))
-            yield AgentEvent(type="error", message=f"Orchestrator failed: {e}")

src/orchestrator_magentic.py CHANGED Viewed

@@ -45,7 +45,7 @@ class MagenticOrchestrator:
         max_rounds: int = 10,
         chat_client: OpenAIChatClient | None = None,
         api_key: str | None = None,
-        timeout_seconds: float = 300.0,
     ) -> None:
         """Initialize orchestrator.
@@ -53,7 +53,7 @@ class MagenticOrchestrator:
             max_rounds: Maximum coordination rounds
             chat_client: Optional shared chat client for agents
             api_key: Optional OpenAI API key (for BYOK)
-            timeout_seconds: Maximum workflow duration (default: 5 minutes)
         """
         # Validate requirements only if no key provided
         if not chat_client and not api_key:

         max_rounds: int = 10,
         chat_client: OpenAIChatClient | None = None,
         api_key: str | None = None,
+        timeout_seconds: float = 600.0,
     ) -> None:
         """Initialize orchestrator.
             max_rounds: Maximum coordination rounds
             chat_client: Optional shared chat client for agents
             api_key: Optional OpenAI API key (for BYOK)
+            timeout_seconds: Maximum workflow duration (default: 10 minutes)
         """
         # Validate requirements only if no key provided
         if not chat_client and not api_key:

src/tools/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from src.tools.base import SearchTool
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.europepmc import EuropePMCTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
@@ -10,6 +11,7 @@ from src.tools.search_handler import SearchHandler
 __all__ = [
     "ClinicalTrialsTool",
     "EuropePMCTool",
     "PubMedTool",
     "SearchHandler",
     "SearchTool",

 from src.tools.base import SearchTool
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.europepmc import EuropePMCTool
+from src.tools.openalex import OpenAlexTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 __all__ = [
     "ClinicalTrialsTool",
     "EuropePMCTool",
+    "OpenAlexTool",
     "PubMedTool",
     "SearchHandler",
     "SearchTool",

src/tools/openalex.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""OpenAlex search tool - citation-aware scholarly search."""
+from typing import Any
+import httpx
+from tenacity import retry, stop_after_attempt, wait_exponential
+from src.utils.exceptions import SearchError
+from src.utils.models import Citation, Evidence
+class OpenAlexTool:
+    """
+    Search OpenAlex for scholarly works with citation metrics.
+    OpenAlex indexes 209M+ works and provides:
+    - Citation counts (prioritize influential papers)
+    - Concept tagging (hierarchical classification)
+    - Open access links (direct PDF URLs)
+    - Related works (ML-powered similarity)
+    API Docs: https://docs.openalex.org
+    Rate Limits: Polite pool with mailto = 100k/day
+    """
+    BASE_URL = "https://api.openalex.org/works"
+    POLITE_EMAIL = "deepboner-research@proton.me"
+    @property
+    def name(self) -> str:
+        return "openalex"
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        reraise=True,
+    )
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """
+        Search OpenAlex, sorted by citation count.
+        Args:
+            query: Search terms
+            max_results: Maximum results to return
+        Returns:
+            List of Evidence objects with citation metadata
+        """
+        params: dict[str, str | int] = {
+            "search": query,
+            "filter": "type:article,has_abstract:true",  # Only articles with abstracts
+            "sort": "cited_by_count:desc",  # Most cited first
+            "per_page": min(max_results, 100),
+            "mailto": self.POLITE_EMAIL,
+        }
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            try:
+                response = await client.get(self.BASE_URL, params=params)
+                response.raise_for_status()
+                data = response.json()
+                works = data.get("results", [])
+                return [self._to_evidence(work) for work in works[:max_results]]
+            except httpx.HTTPStatusError as e:
+                raise SearchError(f"OpenAlex API error: {e}") from e
+            except httpx.RequestError as e:
+                raise SearchError(f"OpenAlex connection failed: {e}") from e
+    def _to_evidence(self, work: dict[str, Any]) -> Evidence:
+        """Convert OpenAlex work to Evidence with rich metadata."""
+        # Extract basic fields
+        title = work.get("display_name", "Untitled")
+        doi = work.get("doi", "")
+        year = work.get("publication_year", "Unknown")
+        cited_by_count = work.get("cited_by_count", 0)
+        # Reconstruct abstract from inverted index
+        abstract = self._reconstruct_abstract(work.get("abstract_inverted_index"))
+        if not abstract:
+            # Should be caught by filter=has_abstract:true, but defensive coding
+            abstract = f"[No abstract available. Cited by {cited_by_count} works.]"
+        # Extract authors (limit to 5)
+        authors = self._extract_authors(work.get("authorships", []))
+        # Extract concepts (top 5 by score)
+        concepts = self._extract_concepts(work.get("concepts", []))
+        # Open access info
+        oa_info = work.get("open_access", {})
+        is_oa = oa_info.get("is_oa", False)
+        # Get PDF URL (prefer best_oa_location)
+        best_oa = work.get("best_oa_location", {})
+        pdf_url = best_oa.get("pdf_url") if best_oa else None
+        # Build URL
+        if doi:
+            url = doi if doi.startswith("http") else f"https://doi.org/{doi}"
+        else:
+            openalex_id = work.get("id", "")
+            url = openalex_id if openalex_id else "https://openalex.org"
+        # Prepend citation badge to content
+        citation_badge = f"[Cited by {cited_by_count}] " if cited_by_count > 0 else ""
+        content = f"{citation_badge}{abstract[:1900]}"
+        # Calculate relevance: normalized citation count (capped at 1.0 for 100 citations)
+        # 100 citations is a very strong signal in most fields.
+        relevance = min(1.0, cited_by_count / 100.0)
+        return Evidence(
+            content=content[:2000],
+            citation=Citation(
+                source="openalex",
+                title=title[:500],
+                url=url,
+                date=str(year),
+                authors=authors,
+            ),
+            relevance=relevance,
+            metadata={
+                "cited_by_count": cited_by_count,
+                "concepts": concepts,
+                "is_open_access": is_oa,
+                "pdf_url": pdf_url,
+            },
+        )
+    def _reconstruct_abstract(self, inverted_index: dict[str, list[int]] | None) -> str:
+        """Rebuild abstract from {"word": [positions]} format."""
+        if not inverted_index:
+            return ""
+        position_word: dict[int, str] = {}
+        for word, positions in inverted_index.items():
+            for pos in positions:
+                position_word[pos] = word
+        if not position_word:
+            return ""
+        max_pos = max(position_word.keys())
+        return " ".join(position_word.get(i, "") for i in range(max_pos + 1))
+    def _extract_authors(self, authorships: list[dict[str, Any]]) -> list[str]:
+        """Extract author names from authorships array."""
+        authors = []
+        for authorship in authorships[:5]:
+            author = authorship.get("author", {})
+            name = author.get("display_name")
+            if name:
+                authors.append(name)
+        return authors
+    def _extract_concepts(self, concepts: list[dict[str, Any]]) -> list[str]:
+        """Extract concept names, sorted by score."""
+        sorted_concepts = sorted(concepts, key=lambda c: c.get("score", 0), reverse=True)
+        return [c.get("display_name", "") for c in sorted_concepts[:5] if c.get("display_name")]

src/utils/config.py CHANGED Viewed

@@ -57,6 +57,10 @@ class Settings(BaseSettings):
     # Agent Configuration
     max_iterations: int = Field(default=10, ge=1, le=50)
     search_timeout: int = Field(default=30, description="Seconds to wait for search")
     # Logging
     log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"

     # Agent Configuration
     max_iterations: int = Field(default=10, ge=1, le=50)
     search_timeout: int = Field(default=30, description="Seconds to wait for search")
+    magentic_timeout: int = Field(
+        default=600,
+        description="Timeout for Magentic mode in seconds",
+    )
     # Logging
     log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"

tests/unit/test_app_timeout.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Tests for app timeout and history preservation."""
+import os
+from unittest.mock import MagicMock, patch
+import pytest
+from src.app import research_agent
+from src.utils.models import AgentEvent
+async def async_gen(items):
+    for item in items:
+        yield item
+@pytest.mark.asyncio
+async def test_complete_event_preserves_history():
+    """
+    Verify that a 'complete' event (like timeout) appends to the history
+    instead of replacing it.
+    """
+    # Mock events: Progress -> Progress -> Complete
+    mock_events = [
+        AgentEvent(type="thinking", message="Step 1: Thinking...", iteration=0),
+        AgentEvent(type="search_complete", message="Step 2: Found data", iteration=1),
+        AgentEvent(type="complete", message="Timeout: Synthesizing...", iteration=1),
+    ]
+    # Create a mock orchestrator that yields these events
+    mock_orchestrator = MagicMock()
+    # The run method should return an async generator
+    mock_orchestrator.run.side_effect = lambda msg: async_gen(mock_events)
+    # Patch configure_orchestrator to return our mock
+    with patch("src.app.configure_orchestrator") as mock_config:
+        mock_config.return_value = (mock_orchestrator, "Mock Backend")
+        # Run the agent
+        results = []
+        async for output in research_agent("test query", [], "simple"):
+            results.append(output)
+        # The final output should contain the accumulated history AND the timeout message
+        final_output = results[-1]
+        # Check for preservation
+        assert "Step 1: Thinking..." in final_output
+        assert "Step 2: Found data" in final_output
+        assert "Timeout: Synthesizing..." in final_output
+@pytest.mark.asyncio
+async def test_timeout_configurable():
+    """Verify MAGENTIC_TIMEOUT env var is respected."""
+    from src.utils.config import Settings
+    with patch.dict(os.environ, {"MAGENTIC_TIMEOUT": "120"}):
+        settings = Settings()
+        assert settings.magentic_timeout == 120

tests/unit/tools/test_openalex.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""Unit tests for OpenAlex tool."""
+from unittest.mock import AsyncMock, MagicMock
+import pytest
+from src.tools.openalex import OpenAlexTool
+from src.utils.models import Evidence
+# Sample OpenAlex response
+SAMPLE_OPENALEX_RESPONSE = {
+    "results": [
+        {
+            "id": "https://openalex.org/W12345",
+            "doi": "https://doi.org/10.1234/test",
+            "display_name": "Metformin in Cancer Treatment",
+            "publication_year": 2024,
+            "cited_by_count": 150,
+            "abstract_inverted_index": {
+                "Metformin": [0],
+                "shows": [1],
+                "promise": [2],
+                "in": [3],
+                "cancer": [4],
+                "treatment": [5],
+            },
+            "concepts": [
+                {"display_name": "Metformin", "score": 0.95, "level": 2},
+                {"display_name": "Cancer", "score": 0.88, "level": 1},
+            ],
+            "authorships": [
+                {"author": {"display_name": "John Smith"}},
+                {"author": {"display_name": "Jane Doe"}},
+            ],
+            "open_access": {"is_oa": True, "oa_url": "https://example.com/oa"},
+            "best_oa_location": {"pdf_url": "https://example.com/paper.pdf"},
+        }
+    ]
+}
+@pytest.mark.unit
+class TestOpenAlexTool:
+    """Tests for OpenAlexTool."""
+    @pytest.fixture
+    def tool(self) -> OpenAlexTool:
+        return OpenAlexTool()
+    @pytest.fixture
+    def mock_client(self, mocker):
+        """Create a standardized mock client with context manager support."""
+        client = AsyncMock()
+        client.__aenter__.return_value = client
+        client.__aexit__.return_value = None
+        # Standard response mock
+        resp = MagicMock()
+        resp.json.return_value = SAMPLE_OPENALEX_RESPONSE
+        resp.raise_for_status.return_value = None
+        client.get.return_value = resp
+        mocker.patch("httpx.AsyncClient", return_value=client)
+        return client
+    def test_tool_name(self, tool: OpenAlexTool) -> None:
+        """Tool name should be 'openalex'."""
+        assert tool.name == "openalex"
+    @pytest.mark.asyncio
+    async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None:
+        """Search should return Evidence objects."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert len(results) == 1
+        assert isinstance(results[0], Evidence)
+        assert results[0].citation.source == "openalex"
+    @pytest.mark.asyncio
+    async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence metadata should include cited_by_count."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert results[0].metadata["cited_by_count"] == 150
+    @pytest.mark.asyncio
+    async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence relevance should be based on citations (capped at 1.0)."""
+        results = await tool.search("metformin cancer", max_results=5)
+        # 150 citations / 100 = 1.5 -> capped at 1.0
+        assert results[0].relevance == 1.0
+    @pytest.mark.asyncio
+    async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence metadata should include concepts."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert "Metformin" in results[0].metadata["concepts"]
+        assert "Cancer" in results[0].metadata["concepts"]
+    @pytest.mark.asyncio
+    async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None:
+        """Evidence metadata should include open access info."""
+        results = await tool.search("metformin cancer", max_results=5)
+        assert results[0].metadata["is_open_access"] is True
+        assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf"
+    def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
+        """Abstract reconstruction from inverted index."""
+        inverted_index = {
+            "Hello": [0],
+            "world": [1],
+            "this": [2],
+            "is": [3],
+            "a": [4],
+            "test": [5],
+        }
+        result = tool._reconstruct_abstract(inverted_index)
+        assert result == "Hello world this is a test"
+    def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None:
+        """Handle None or empty inverted index."""
+        assert tool._reconstruct_abstract(None) == ""
+        assert tool._reconstruct_abstract({}) == ""
+    @pytest.mark.asyncio
+    async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None:
+        """Handle empty results gracefully."""
+        mock_client.get.return_value.json.return_value = {"results": []}
+        results = await tool.search("xyznonexistent123", max_results=5)
+        assert results == []
+    @pytest.mark.asyncio
+    async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None:
+        """Verify API call requests citation-sorted results and uses polite pool."""
+        mock_client.get.return_value.json.return_value = {"results": []}
+        await tool.search("test query", max_results=5)
+        # Verify call params
+        call_args = mock_client.get.call_args
+        params = call_args[1]["params"]
+        assert params["sort"] == "cited_by_count:desc"
+        assert params["mailto"] == tool.POLITE_EMAIL
+        assert "type:article" in params["filter"]
+        assert "has_abstract:true" in params["filter"]
+@pytest.mark.integration
+class TestOpenAlexIntegration:
+    """Integration tests with real OpenAlex API."""
+    @pytest.mark.asyncio
+    async def test_real_api_returns_results(self) -> None:
+        """Test actual API returns relevant results."""
+        tool = OpenAlexTool()
+        results = await tool.search("metformin cancer treatment", max_results=3)
+        assert len(results) > 0
+        # Should have citation counts
+        assert results[0].metadata["cited_by_count"] >= 0
+        # Should have abstract text
+        assert len(results[0].content) > 50
+        # Should have concepts
+        assert len(results[0].metadata["concepts"]) > 0