VibecoderMcSwaggins commited on
Commit
2cbcbfd
·
unverified ·
2 Parent(s): 0257d2f af7d422

Merge pull request #69 from The-Obstacle-Is-The-Way/dev

Browse files

feat: SPEC_03/04/05 Implementation (OpenAlex + Magentic UX + Cleanup)

docs/specs/SPEC_03_OPENALEX_INTEGRATION.md ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPEC 03: OpenAlex Integration
2
+
3
+ ## Priority: P1 (Feature Enhancement)
4
+
5
+ ## Problem Statement
6
+
7
+ We currently search 3 sources (PubMed, Europe PMC, ClinicalTrials.gov) but lack **citation metrics**. We cannot distinguish a highly-cited landmark paper from an obscure one. OpenAlex provides:
8
+
9
+ 1. **Citation counts** - Prioritize authoritative papers
10
+ 2. **Citation networks** - "Who cites whom"
11
+ 3. **Concept tagging** - Hierarchical categorization
12
+ 4. **Open access links** - Direct PDF URLs
13
+
14
+ **FREE API. No key required. 209M+ works indexed.**
15
+
16
+ > **Note:** This spec supersedes `docs/future-roadmap/phases/15_PHASE_OPENALEX.md`.
17
+
18
+ ## Groundwork Already Done
19
+
20
+ ```python
21
+ # src/utils/models.py:9
22
+ SourceName = Literal["pubmed", "clinicaltrials", "europepmc", "preprint", "openalex", "web"]
23
+
24
+ # src/utils/models.py:39-42
25
+ metadata: dict[str, Any] = Field(
26
+ default_factory=dict,
27
+ description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
28
+ )
29
+ ```
30
+
31
+ The infrastructure is ready. We just need to build the tool.
32
+
33
+ ## OpenAlex API Reference
34
+
35
+ ### Endpoint
36
+
37
+ ```
38
+ GET https://api.openalex.org/works
39
+ ```
40
+
41
+ ### Key Parameters
42
+
43
+ | Parameter | Description |
44
+ |-----------|-------------|
45
+ | `search` | Full-text search across title, abstract, fulltext |
46
+ | `filter` | Constrain results (e.g., `type:article`, `has_abstract:true`) |
47
+ | `sort` | Order results (e.g., `cited_by_count:desc`) |
48
+ | `per_page` | Results per page (max 200) |
49
+ | `mailto` | Email for polite pool (higher rate limits) |
50
+
51
+ ### Example Request
52
+
53
+ ```bash
54
+ GET https://api.openalex.org/works?search=metformin%20cancer&filter=type:article,has_abstract:true&sort=cited_by_count:desc&per_page=10&mailto=deepboner-research@proton.me
55
+ ```
56
+
57
+ ### Response Structure
58
+
59
+ ```json
60
+ {
61
+ "results": [
62
+ {
63
+ "id": "https://openalex.org/W2741809807",
64
+ "doi": "https://doi.org/10.1234/example",
65
+ "display_name": "Paper Title",
66
+ "publication_year": 2024,
67
+ "cited_by_count": 150,
68
+ "abstract_inverted_index": {
69
+ "word1": [0],
70
+ "word2": [1, 5]
71
+ },
72
+ "concepts": [
73
+ {"display_name": "Metformin", "score": 0.95, "level": 2}
74
+ ],
75
+ "authorships": [
76
+ {"author": {"display_name": "John Smith"}}
77
+ ],
78
+ "open_access": {
79
+ "is_oa": true,
80
+ "oa_url": "https://example.com/pdf"
81
+ },
82
+ "best_oa_location": {
83
+ "pdf_url": "https://example.com/paper.pdf"
84
+ }
85
+ }
86
+ ]
87
+ }
88
+ ```
89
+
90
+ ## Architecture
91
+
92
+ ### Class Diagram
93
+
94
+ ```
95
+ ┌─────────────────────────────────────┐
96
+ │ SearchTool (Protocol) │
97
+ │ ───────────────────────────────── │
98
+ │ + name: str │
99
+ │ + search(query, max_results) → list[Evidence] │
100
+ └──────────────────┬──────────────────┘
101
+ │ implements
102
+ ┌──────────────────▼──────────────────┐
103
+ │ OpenAlexTool │
104
+ │ ───────────────────────────────── │
105
+ │ - BASE_URL: str │
106
+ │ - POLITE_EMAIL: str │
107
+ │ ───────────────────────────────── │
108
+ │ + name → "openalex" │
109
+ │ + search(query, max_results) → list[Evidence] │
110
+ │ - _reconstruct_abstract(inverted_index) → str │
111
+ │ - _to_evidence(work) → Evidence │
112
+ │ - _extract_authors(authorships) → list[str] │
113
+ │ - _extract_concepts(concepts) → list[str] │
114
+ └─────────────────────────────────────┘
115
+ ```
116
+
117
+ ## TDD Implementation Plan
118
+
119
+ ### Red Phase: Write Failing Tests First
120
+
121
+ **File: `tests/unit/tools/test_openalex.py`**
122
+
123
+ ```python
124
+ """Unit tests for OpenAlex tool - TDD RED phase."""
125
+
126
+ from unittest.mock import AsyncMock, MagicMock
127
+
128
+ import pytest
129
+
130
+ from src.tools.openalex import OpenAlexTool
131
+ from src.utils.models import Evidence
132
+
133
+
134
+ # Sample OpenAlex response
135
+ SAMPLE_OPENALEX_RESPONSE = {
136
+ "results": [
137
+ {
138
+ "id": "https://openalex.org/W12345",
139
+ "doi": "https://doi.org/10.1234/test",
140
+ "display_name": "Metformin in Cancer Treatment",
141
+ "publication_year": 2024,
142
+ "cited_by_count": 150,
143
+ "abstract_inverted_index": {
144
+ "Metformin": [0],
145
+ "shows": [1],
146
+ "promise": [2],
147
+ "in": [3],
148
+ "cancer": [4],
149
+ "treatment": [5],
150
+ },
151
+ "concepts": [
152
+ {"display_name": "Metformin", "score": 0.95, "level": 2},
153
+ {"display_name": "Cancer", "score": 0.88, "level": 1},
154
+ ],
155
+ "authorships": [
156
+ {"author": {"display_name": "John Smith"}},
157
+ {"author": {"display_name": "Jane Doe"}},
158
+ ],
159
+ "open_access": {"is_oa": True, "oa_url": "https://example.com/oa"},
160
+ "best_oa_location": {"pdf_url": "https://example.com/paper.pdf"},
161
+ }
162
+ ]
163
+ }
164
+
165
+
166
+ @pytest.mark.unit
167
+ class TestOpenAlexTool:
168
+ """Tests for OpenAlexTool."""
169
+
170
+ @pytest.fixture
171
+ def tool(self) -> OpenAlexTool:
172
+ return OpenAlexTool()
173
+
174
+ @pytest.fixture
175
+ def mock_client(self, mocker):
176
+ """Create a standardized mock client with context manager support."""
177
+ client = AsyncMock()
178
+ client.__aenter__.return_value = client
179
+ client.__aexit__.return_value = None
180
+
181
+ # Standard response mock
182
+ resp = MagicMock()
183
+ resp.json.return_value = SAMPLE_OPENALEX_RESPONSE
184
+ resp.raise_for_status.return_value = None
185
+ client.get.return_value = resp
186
+
187
+ mocker.patch("httpx.AsyncClient", return_value=client)
188
+ return client
189
+
190
+ def test_tool_name(self, tool: OpenAlexTool) -> None:
191
+ """Tool name should be 'openalex'."""
192
+ assert tool.name == "openalex"
193
+
194
+ @pytest.mark.asyncio
195
+ async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None:
196
+ """Search should return Evidence objects."""
197
+ results = await tool.search("metformin cancer", max_results=5)
198
+
199
+ assert len(results) == 1
200
+ assert isinstance(results[0], Evidence)
201
+ assert results[0].citation.source == "openalex"
202
+
203
+ @pytest.mark.asyncio
204
+ async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None:
205
+ """Evidence metadata should include cited_by_count."""
206
+ results = await tool.search("metformin cancer", max_results=5)
207
+ assert results[0].metadata["cited_by_count"] == 150
208
+
209
+ @pytest.mark.asyncio
210
+ async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None:
211
+ """Evidence relevance should be based on citations (capped at 1.0)."""
212
+ results = await tool.search("metformin cancer", max_results=5)
213
+ # 150 citations / 100 = 1.5 -> capped at 1.0
214
+ assert results[0].relevance == 1.0
215
+
216
+ @pytest.mark.asyncio
217
+ async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None:
218
+ """Evidence metadata should include concepts."""
219
+ results = await tool.search("metformin cancer", max_results=5)
220
+ assert "Metformin" in results[0].metadata["concepts"]
221
+ assert "Cancer" in results[0].metadata["concepts"]
222
+
223
+ @pytest.mark.asyncio
224
+ async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None:
225
+ """Evidence metadata should include open access info."""
226
+ results = await tool.search("metformin cancer", max_results=5)
227
+ assert results[0].metadata["is_open_access"] is True
228
+ assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf"
229
+
230
+ def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
231
+ """Abstract reconstruction from inverted index."""
232
+ inverted_index = {
233
+ "Hello": [0],
234
+ "world": [1],
235
+ "this": [2],
236
+ "is": [3],
237
+ "a": [4],
238
+ "test": [5],
239
+ }
240
+ result = tool._reconstruct_abstract(inverted_index)
241
+ assert result == "Hello world this is a test"
242
+
243
+ def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None:
244
+ """Handle None or empty inverted index."""
245
+ assert tool._reconstruct_abstract(None) == ""
246
+ assert tool._reconstruct_abstract({}) == ""
247
+
248
+ @pytest.mark.asyncio
249
+ async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None:
250
+ """Handle empty results gracefully."""
251
+ mock_client.get.return_value.json.return_value = {"results": []}
252
+
253
+ results = await tool.search("xyznonexistent123", max_results=5)
254
+
255
+ assert results == []
256
+
257
+ @pytest.mark.asyncio
258
+ async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None:
259
+ """Verify API call requests citation-sorted results and uses polite pool."""
260
+ mock_client.get.return_value.json.return_value = {"results": []}
261
+
262
+ await tool.search("test query", max_results=5)
263
+
264
+ # Verify call params
265
+ call_args = mock_client.get.call_args
266
+ params = call_args[1]["params"]
267
+ assert params["sort"] == "cited_by_count:desc"
268
+ assert params["mailto"] == tool.POLITE_EMAIL
269
+ assert "type:article" in params["filter"]
270
+ assert "has_abstract:true" in params["filter"]
271
+ ```
272
+
273
+ ### Green Phase: Implement to Pass Tests
274
+
275
+ **File: `src/tools/openalex.py`**
276
+
277
+ ```python
278
+ """OpenAlex search tool - citation-aware scholarly search."""
279
+
280
+ from typing import Any
281
+
282
+ import httpx
283
+ from tenacity import retry, stop_after_attempt, wait_exponential
284
+
285
+ from src.utils.exceptions import SearchError
286
+ from src.utils.models import Citation, Evidence
287
+
288
+
289
+ class OpenAlexTool:
290
+ """
291
+ Search OpenAlex for scholarly works with citation metrics.
292
+
293
+ OpenAlex indexes 209M+ works and provides:
294
+ - Citation counts (prioritize influential papers)
295
+ - Concept tagging (hierarchical classification)
296
+ - Open access links (direct PDF URLs)
297
+ - Related works (ML-powered similarity)
298
+
299
+ API Docs: https://docs.openalex.org
300
+ Rate Limits: Polite pool with mailto = 100k/day
301
+ """
302
+
303
+ BASE_URL = "https://api.openalex.org/works"
304
+ POLITE_EMAIL = "deepboner-research@proton.me"
305
+
306
+ @property
307
+ def name(self) -> str:
308
+ return "openalex"
309
+
310
+ @retry(
311
+ stop=stop_after_attempt(3),
312
+ wait=wait_exponential(multiplier=1, min=1, max=10),
313
+ reraise=True,
314
+ )
315
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
316
+ """
317
+ Search OpenAlex, sorted by citation count.
318
+
319
+ Args:
320
+ query: Search terms
321
+ max_results: Maximum results to return
322
+
323
+ Returns:
324
+ List of Evidence objects with citation metadata
325
+ """
326
+ params: dict[str, str | int] = {
327
+ "search": query,
328
+ "filter": "type:article,has_abstract:true", # Only articles with abstracts
329
+ "sort": "cited_by_count:desc", # Most cited first
330
+ "per_page": min(max_results, 100),
331
+ "mailto": self.POLITE_EMAIL,
332
+ }
333
+
334
+ async with httpx.AsyncClient(timeout=30.0) as client:
335
+ try:
336
+ response = await client.get(self.BASE_URL, params=params)
337
+ response.raise_for_status()
338
+
339
+ data = response.json()
340
+ works = data.get("results", [])
341
+
342
+ return [self._to_evidence(work) for work in works[:max_results]]
343
+
344
+ except httpx.HTTPStatusError as e:
345
+ raise SearchError(f"OpenAlex API error: {e}") from e
346
+ except httpx.RequestError as e:
347
+ raise SearchError(f"OpenAlex connection failed: {e}") from e
348
+
349
+ def _to_evidence(self, work: dict[str, Any]) -> Evidence:
350
+ """Convert OpenAlex work to Evidence with rich metadata."""
351
+ # Extract basic fields
352
+ title = work.get("display_name", "Untitled")
353
+ doi = work.get("doi", "")
354
+ year = work.get("publication_year", "Unknown")
355
+ cited_by_count = work.get("cited_by_count", 0)
356
+
357
+ # Reconstruct abstract from inverted index
358
+ abstract = self._reconstruct_abstract(work.get("abstract_inverted_index"))
359
+ if not abstract:
360
+ # Should be caught by filter=has_abstract:true, but defensive coding
361
+ abstract = f"[No abstract available. Cited by {cited_by_count} works.]"
362
+
363
+ # Extract authors (limit to 5)
364
+ authors = self._extract_authors(work.get("authorships", []))
365
+
366
+ # Extract concepts (top 5 by score)
367
+ concepts = self._extract_concepts(work.get("concepts", []))
368
+
369
+ # Open access info
370
+ oa_info = work.get("open_access", {})
371
+ is_oa = oa_info.get("is_oa", False)
372
+
373
+ # Get PDF URL (prefer best_oa_location)
374
+ best_oa = work.get("best_oa_location", {})
375
+ pdf_url = best_oa.get("pdf_url") if best_oa else None
376
+
377
+ # Build URL
378
+ if doi:
379
+ url = doi if doi.startswith("http") else f"https://doi.org/{doi}"
380
+ else:
381
+ openalex_id = work.get("id", "")
382
+ url = openalex_id if openalex_id else "https://openalex.org"
383
+
384
+ # Prepend citation badge to content
385
+ citation_badge = f"[Cited by {cited_by_count}] " if cited_by_count > 0 else ""
386
+ content = f"{citation_badge}{abstract[:1900]}"
387
+
388
+ # Calculate relevance: normalized citation count (capped at 1.0 for 100 citations)
389
+ # 100 citations is a very strong signal in most fields.
390
+ relevance = min(1.0, cited_by_count / 100.0)
391
+
392
+ return Evidence(
393
+ content=content[:2000],
394
+ citation=Citation(
395
+ source="openalex",
396
+ title=title[:500],
397
+ url=url,
398
+ date=str(year),
399
+ authors=authors,
400
+ ),
401
+ relevance=relevance,
402
+ metadata={
403
+ "cited_by_count": cited_by_count,
404
+ "concepts": concepts,
405
+ "is_open_access": is_oa,
406
+ "pdf_url": pdf_url,
407
+ },
408
+ )
409
+
410
+ def _reconstruct_abstract(self, inverted_index: dict[str, list[int]] | None) -> str:
411
+ """Rebuild abstract from {"word": [positions]} format."""
412
+ if not inverted_index:
413
+ return ""
414
+
415
+ position_word: dict[int, str] = {}
416
+ for word, positions in inverted_index.items():
417
+ for pos in positions:
418
+ position_word[pos] = word
419
+
420
+ if not position_word:
421
+ return ""
422
+
423
+ max_pos = max(position_word.keys())
424
+ return " ".join(position_word.get(i, "") for i in range(max_pos + 1))
425
+
426
+ def _extract_authors(self, authorships: list[dict[str, Any]]) -> list[str]:
427
+ """Extract author names from authorships array."""
428
+ authors = []
429
+ for authorship in authorships[:5]:
430
+ author = authorship.get("author", {})
431
+ name = author.get("display_name")
432
+ if name:
433
+ authors.append(name)
434
+ return authors
435
+
436
+ def _extract_concepts(self, concepts: list[dict[str, Any]]) -> list[str]:
437
+ """Extract concept names, sorted by score."""
438
+ sorted_concepts = sorted(concepts, key=lambda c: c.get("score", 0), reverse=True)
439
+ return [c.get("display_name", "") for c in sorted_concepts[:5] if c.get("display_name")]
440
+ ```
441
+
442
+ ### Refactor Phase: Clean Integration
443
+
444
+ **Update: `src/tools/__init__.py`**
445
+
446
+ ```python
447
+ """Search tools package."""
448
+
449
+ from src.tools.base import SearchTool
450
+ from src.tools.clinicaltrials import ClinicalTrialsTool
451
+ from src.tools.europepmc import EuropePMCTool
452
+ from src.tools.openalex import OpenAlexTool
453
+ from src.tools.pubmed import PubMedTool
454
+ from src.tools.search_handler import SearchHandler
455
+
456
+ __all__ = [
457
+ "ClinicalTrialsTool",
458
+ "EuropePMCTool",
459
+ "OpenAlexTool",
460
+ "PubMedTool",
461
+ "SearchHandler",
462
+ "SearchTool",
463
+ ]
464
+ ```
465
+
466
+ ## Test Matrix
467
+
468
+ | Test | What It Validates | Priority |
469
+ |------|------------------|----------|
470
+ | `test_tool_name` | Returns "openalex" | P0 |
471
+ | `test_search_returns_evidence` | Returns `list[Evidence]` | P0 |
472
+ | `test_search_includes_citation_count` | `metadata["cited_by_count"]` populated | P0 |
473
+ | `test_search_calculates_relevance` | `relevance` derived from citations | P1 |
474
+ | `test_search_includes_concepts` | `metadata["concepts"]` populated | P0 |
475
+ | `test_search_includes_open_access_info` | `metadata["is_open_access"]` and `pdf_url` | P1 |
476
+ | `test_reconstruct_abstract` | Inverted index → text | P0 |
477
+ | `test_reconstruct_abstract_empty` | Handle None/empty inputs | P1 |
478
+ | `test_search_empty_results` | Return `[]` for no matches | P0 |
479
+ | `test_search_params` | API params (`sort`, `filter`, `mailto`) | P1 |
480
+
481
+ ## Integration Test
482
+
483
+ ```python
484
+ @pytest.mark.integration
485
+ class TestOpenAlexIntegration:
486
+ """Integration tests with real OpenAlex API."""
487
+
488
+ @pytest.mark.asyncio
489
+ async def test_real_api_returns_results(self) -> None:
490
+ """Test actual API returns relevant results."""
491
+ tool = OpenAlexTool()
492
+ results = await tool.search("metformin cancer treatment", max_results=3)
493
+
494
+ assert len(results) > 0
495
+ # Should have citation counts
496
+ assert results[0].metadata["cited_by_count"] >= 0
497
+ # Should have abstract text
498
+ assert len(results[0].content) > 50
499
+ # Should have concepts
500
+ assert len(results[0].metadata["concepts"]) > 0
501
+ ```
502
+
503
+ ## Acceptance Criteria
504
+
505
+ - [x] `OpenAlexTool` implements `SearchTool` Protocol
506
+ - [x] Tool returns `list[Evidence]` with citation metadata
507
+ - [x] Abstract reconstructed from inverted index format
508
+ - [x] Relevance calculated from citation count (capped at 1.0)
509
+ - [x] Exported from `src/tools/__init__.py`
510
+ - [x] Integrated into `src/app.py` SearchHandler
511
+ - [x] UI description updated to mention OpenAlex
512
+ - [x] All unit tests pass (11 tests)
513
+ - [x] Integration test passes with real API
514
+
515
+ **Status: IMPLEMENTED** (commits fd28242, cb46aac)
516
+
517
+ ## Files Modified
518
+
519
+ 1. `src/tools/openalex.py` - NEW: OpenAlex tool implementation
520
+ 2. `tests/unit/tools/test_openalex.py` - NEW: Unit and integration tests
521
+ 3. `src/tools/__init__.py` - Export OpenAlexTool
522
+ 4. `src/app.py` - Wire OpenAlexTool into SearchHandler
docs/specs/SPEC_04_MAGENTIC_UX.md ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPEC 04: Magentic Mode UX Improvements
2
+
3
+ ## Priority: P1 (Demo Quality)
4
+
5
+ ## Problem Statement
6
+
7
+ Magentic (advanced) mode has several UX issues that degrade the user experience:
8
+
9
+ 1. **P0: Chat history cleared on timeout** - When timeout occurs, all progress events are erased
10
+ 2. **P1: Timeout too short** - 300s default insufficient for complex multi-agent workflows
11
+ 3. **P1: Timeout not configurable** - Users can't adjust based on their needs
12
+ 4. **P2: No graceful degradation** - System doesn't synthesize early when timeout approaches
13
+
14
+ ## Related Issues
15
+
16
+ - GitHub Issue #68: Magentic mode times out at 300s without completing
17
+ - GitHub Issue #65: Demo timing (predecessor, now closed)
18
+ - SPEC_01: Demo Termination (implemented the basic timeout)
19
+
20
+ ## Bug Analysis
21
+
22
+ ### Bug 1: Chat History Cleared on Timeout (P0)
23
+
24
+ **Location**: `src/app.py:205-206`
25
+
26
+ **Current Code**:
27
+ ```python
28
+ if event.type == "complete":
29
+ yield event.message # BUG: Discards all accumulated progress!
30
+ else:
31
+ event_md = event.to_markdown()
32
+ response_parts.append(event_md)
33
+ yield "\n\n".join(response_parts)
34
+ ```
35
+
36
+ **Problem**: The `complete` event (including timeout) yields ONLY the completion message, discarding all the `response_parts` that show what the system actually did.
37
+
38
+ **User Sees**:
39
+ ```
40
+ Research timed out. Synthesizing available evidence...
41
+ ```
42
+
43
+ **User Should See**:
44
+ ```
45
+ 🚀 STARTED: Starting research (Magentic mode)...
46
+ ⏳ THINKING: Multi-agent reasoning in progress...
47
+ 🧠 JUDGING: Manager (user_task): Research drug repurposing...
48
+ 🧠 JUDGING: Manager (task_ledger): We are working to address...
49
+ 🧠 JUDGING: Manager (instruction): Task: Retrieve human clinical...
50
+ ⏱️ Research timed out. Synthesizing available evidence...
51
+ ```
52
+
53
+ **Fix**:
54
+ ```python
55
+ if event.type == "complete":
56
+ response_parts.append(event.message)
57
+ yield "\n\n".join(response_parts) # Preserves all progress
58
+ ```
59
+
60
+ ### Bug 2: Timeout Too Short (P1)
61
+
62
+ **Location**: `src/orchestrator_magentic.py:48`
63
+
64
+ **Current**: `timeout_seconds: float = 300.0` (5 minutes)
65
+
66
+ **Problem**: Multi-agent workflows with 4 agents (Search, Hypothesis, Judge, Report) and up to 10 rounds can theoretically take 60+ minutes. Even typical runs take 5-10 minutes.
67
+
68
+ **Analysis of Per-Agent Latency**:
69
+ | Agent | Typical Latency | Worst Case |
70
+ |-------|-----------------|------------|
71
+ | SearchAgent | 30-60s | 120s (network issues) |
72
+ | HypothesisAgent | 60-90s | 180s (complex reasoning) |
73
+ | JudgeAgent | 30-60s | 120s |
74
+ | ReportAgent | 60-120s | 240s (long synthesis) |
75
+
76
+ With `max_rounds=10`: 10 × 4 × 90s = 60 minutes worst case.
77
+
78
+ ### Bug 3: Timeout Not Configurable (P1)
79
+
80
+ **Problem**: The factory doesn't pass timeout config to MagenticOrchestrator.
81
+
82
+ **Location**: `src/orchestrator_factory.py:52-55`
83
+ ```python
84
+ return orchestrator_cls(
85
+ max_rounds=config.max_iterations if config else 10,
86
+ api_key=api_key,
87
+ # Missing: timeout_seconds
88
+ )
89
+ ```
90
+
91
+ ## Proposed Solutions
92
+
93
+ ### Fix 1: Preserve Chat History (P0)
94
+
95
+ ```python
96
+ # src/app.py - Replace lines 205-212
97
+ if event.type == "complete":
98
+ # Preserve accumulated progress + add completion message
99
+ response_parts.append(event.message)
100
+ yield "\n\n".join(response_parts)
101
+ else:
102
+ event_md = event.to_markdown()
103
+ response_parts.append(event_md)
104
+ yield "\n\n".join(response_parts)
105
+ ```
106
+
107
+ **Test**:
108
+ ```python
109
+ @pytest.mark.asyncio
110
+ async def test_timeout_preserves_chat_history(mock_magentic_workflow):
111
+ """Verify timeout doesn't erase progress events."""
112
+ # Mock workflow that yields events then times out
113
+ events = []
114
+ async for event in research_agent("test", [], "advanced", "sk-test"):
115
+ events.append(event)
116
+
117
+ # Should contain both progress AND timeout message
118
+ output = events[-1] # Final yield
119
+ assert "STARTED" in output
120
+ assert "timed out" in output.lower()
121
+ ```
122
+
123
+ ### Fix 2: Increase Default Timeout (P1)
124
+
125
+ ```python
126
+ # src/orchestrator_magentic.py
127
+ def __init__(
128
+ self,
129
+ max_rounds: int = 10,
130
+ chat_client: OpenAIChatClient | None = None,
131
+ api_key: str | None = None,
132
+ timeout_seconds: float = 600.0, # Changed: 10 minutes (was 5)
133
+ ) -> None:
134
+ ```
135
+
136
+ ### Fix 3: Make Timeout Configurable via Environment (P1)
137
+
138
+ ```python
139
+ # src/utils/config.py
140
+ class Settings(BaseSettings):
141
+ # ... existing fields ...
142
+ magentic_timeout: int = Field(
143
+ default=600,
144
+ description="Timeout for Magentic mode in seconds",
145
+ )
146
+ ```
147
+
148
+ ```python
149
+ # src/orchestrator_factory.py
150
+ return orchestrator_cls(
151
+ max_rounds=config.max_iterations if config else 10,
152
+ api_key=api_key,
153
+ timeout_seconds=settings.magentic_timeout, # NEW
154
+ )
155
+ ```
156
+
157
+ ### Fix 4: Graceful Degradation (P2 - Future)
158
+
159
+ ```python
160
+ # src/orchestrator_magentic.py - Inside run() loop
161
+ elapsed = time.time() - start_time
162
+ time_remaining = self._timeout_seconds - elapsed
163
+
164
+ # If 80% of time elapsed, force synthesis
165
+ if time_remaining < self._timeout_seconds * 0.2:
166
+ yield AgentEvent(
167
+ type="synthesizing",
168
+ message="Time limit approaching, synthesizing available evidence...",
169
+ iteration=iteration,
170
+ )
171
+ # TODO: Inject signal to trigger ReportAgent
172
+ break
173
+ ```
174
+
175
+ ## Implementation Order
176
+
177
+ 1. **Fix 1 (P0)**: Chat history preservation - 5 minutes, 1 line change
178
+ 2. **Fix 2 (P1)**: Increase default timeout - 5 minutes, 1 line change
179
+ 3. **Fix 3 (P1)**: Environment config - 15 minutes, 3 files
180
+ 4. **Fix 4 (P2)**: Graceful degradation - 1 hour, research agent-framework signals
181
+
182
+ ## Acceptance Criteria
183
+
184
+ - [x] Timeout shows ALL progress events, not just timeout message
185
+ - [x] Default timeout increased to 600s (10 minutes)
186
+ - [x] Timeout configurable via `MAGENTIC_TIMEOUT` env var
187
+ - [x] Tests verify chat history preserved on timeout
188
+ - [ ] (P2) System synthesizes early when timeout approaches (Future)
189
+
190
+ **Status: IMPLEMENTED** (commit cb46aac)
191
+
192
+ ## Files to Modify
193
+
194
+ 1. `src/app.py` - Fix chat history clearing (lines 205-212)
195
+ 2. `src/orchestrator_magentic.py` - Increase default timeout
196
+ 3. `src/utils/config.py` - Add `magentic_timeout` setting
197
+ 4. `src/orchestrator_factory.py` - Pass timeout to MagenticOrchestrator
198
+ 5. `tests/unit/test_app_timeout.py` - NEW: Test chat history preservation
199
+
200
+ ## Test Plan
201
+
202
+ ```python
203
+ # tests/unit/test_app_timeout.py
204
+
205
+ @pytest.mark.asyncio
206
+ async def test_complete_event_preserves_history():
207
+ """Complete events should append to history, not replace it."""
208
+ from src.app import research_agent
209
+
210
+ # This requires mocking the orchestrator to emit events then complete
211
+ # Verify final output contains ALL events, not just completion message
212
+ pass
213
+
214
+
215
+ @pytest.mark.asyncio
216
+ async def test_timeout_configurable():
217
+ """Verify MAGENTIC_TIMEOUT env var is respected."""
218
+ import os
219
+ os.environ["MAGENTIC_TIMEOUT"] = "120"
220
+
221
+ from src.utils.config import Settings
222
+ settings = Settings()
223
+ assert settings.magentic_timeout == 120
224
+ ```
225
+
226
+ ## Risk Assessment
227
+
228
+ | Fix | Risk | Mitigation |
229
+ |-----|------|------------|
230
+ | Fix 1 | Low | Simple change, well-understood |
231
+ | Fix 2 | Low | Just a default value change |
232
+ | Fix 3 | Medium | New config, needs validation |
233
+ | Fix 4 | High | Requires understanding agent-framework internals |
234
+
235
+ ## Dependencies
236
+
237
+ - Fix 4 requires investigation of `agent-framework-core` to understand how to signal early termination to the workflow manager.
docs/specs/SPEC_05_ORCHESTRATOR_CLEANUP.md ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPEC 05: Orchestrator Module Cleanup
2
+
3
+ ## Priority: P3 (Code Hygiene)
4
+
5
+ ## Problem Statement
6
+
7
+ The codebase has an inconsistent orchestrator organization:
8
+
9
+ ```
10
+ src/
11
+ ├── orchestrator/ # EMPTY folder (just . and ..)
12
+ ├── orchestrator.py # Simple mode (15KB, 67% coverage)
13
+ ├── orchestrator_factory.py # Factory pattern (2.5KB, 87% coverage)
14
+ ├── orchestrator_hierarchical.py # Unused (3KB, 0% coverage)
15
+ └── orchestrator_magentic.py # Advanced mode (11KB, 68% coverage)
16
+ ```
17
+
18
+ ## Related Issues
19
+
20
+ - GitHub Issue #67: Clean up empty src/orchestrator/ folder
21
+
22
+ ## Analysis
23
+
24
+ ### Empty Folder
25
+ The `src/orchestrator/` folder was created but never populated. All orchestrator implementations remain flat in `src/`.
26
+
27
+ ### Dead Code
28
+ `orchestrator_hierarchical.py` has **0% test coverage** and appears to be an early prototype that was never integrated:
29
+ - Not imported anywhere in production code
30
+ - Not referenced in any tests
31
+ - Pattern doesn't match current architecture
32
+
33
+ ### Import Pattern
34
+ All 30+ imports use the flat structure:
35
+ ```python
36
+ from src.orchestrator import Orchestrator
37
+ from src.orchestrator_factory import create_orchestrator
38
+ from src.orchestrator_magentic import MagenticOrchestrator
39
+ ```
40
+
41
+ ## Options
42
+
43
+ ### Option A: Minimal Cleanup (Recommended)
44
+
45
+ Delete the empty folder and dead code:
46
+
47
+ ```bash
48
+ rm -rf src/orchestrator/
49
+ rm src/orchestrator_hierarchical.py
50
+ ```
51
+
52
+ **Pros**: Zero import changes, minimal risk, quick
53
+ **Cons**: Flat structure remains
54
+
55
+ ### Option B: Full Consolidation (Future)
56
+
57
+ Move everything into a proper module:
58
+
59
+ ```
60
+ src/orchestrator/
61
+ ├── __init__.py # Re-export for backwards compat
62
+ ├── base.py # Shared protocols/types
63
+ ├── simple.py # From orchestrator.py
64
+ ├── magentic.py # From orchestrator_magentic.py
65
+ └── factory.py # From orchestrator_factory.py
66
+ ```
67
+
68
+ **Pros**: Cleaner organization, better separation
69
+ **Cons**: 30+ import changes, risk of breakage, time investment
70
+
71
+ ### Option C: Hybrid (Pragmatic)
72
+
73
+ Delete empty folder + dead code now. Create `src/orchestrator/__init__.py` that re-exports from flat files:
74
+
75
+ ```python
76
+ # src/orchestrator/__init__.py
77
+ from src.orchestrator import Orchestrator
78
+ from src.orchestrator_factory import create_orchestrator
79
+ from src.orchestrator_magentic import MagenticOrchestrator
80
+
81
+ __all__ = ["Orchestrator", "create_orchestrator", "MagenticOrchestrator"]
82
+ ```
83
+
84
+ **Problem**: This creates confusing import semantics (`src.orchestrator` would be both a module and a file).
85
+
86
+ ## Recommendation
87
+
88
+ **Option A** for now. The flat structure works fine and changing it provides no functional benefit. The empty folder and dead code should be removed.
89
+
90
+ Option B can be revisited post-hackathon when there's time for a proper refactor.
91
+
92
+ ## Implementation
93
+
94
+ ### Step 1: Remove Empty Folder
95
+
96
+ ```bash
97
+ rm -rf src/orchestrator/
98
+ ```
99
+
100
+ ### Step 2: Remove Dead Code (Optional)
101
+
102
+ ```bash
103
+ rm src/orchestrator_hierarchical.py
104
+ ```
105
+
106
+ If keeping for reference, add a deprecation notice:
107
+ ```python
108
+ # src/orchestrator_hierarchical.py
109
+ """
110
+ DEPRECATED: Unused hierarchical orchestrator prototype.
111
+ Kept for reference only. See orchestrator.py (simple) or
112
+ orchestrator_magentic.py (advanced) for active implementations.
113
+ """
114
+ ```
115
+
116
+ ### Step 3: Verify
117
+
118
+ ```bash
119
+ make check # All 142 tests should pass
120
+ ```
121
+
122
+ ## Acceptance Criteria
123
+
124
+ - [x] Empty `src/orchestrator/` folder deleted
125
+ - [x] No broken imports (grep for `from src.orchestrator/`)
126
+ - [x] Tests pass (154 unit tests)
127
+ - [x] `orchestrator_hierarchical.py` removed
128
+
129
+ **Status: IMPLEMENTED** (commit cb46aac)
130
+
131
+ ## Files to Modify
132
+
133
+ 1. `src/orchestrator/` - DELETE (empty folder)
134
+ 2. `src/orchestrator_hierarchical.py` - DELETE or add deprecation notice
135
+
136
+ ## Test Plan
137
+
138
+ ```bash
139
+ # Verify nothing imports from the folder path
140
+ grep -r "from src.orchestrator/" src tests
141
+ # Should return nothing
142
+
143
+ # Verify nothing imports hierarchical
144
+ grep -r "orchestrator_hierarchical" src tests
145
+ # Should return nothing (except possibly this spec)
146
+
147
+ # Run full test suite
148
+ make check
149
+ ```
150
+
151
+ ## Risk Assessment
152
+
153
+ | Action | Risk | Mitigation |
154
+ |--------|------|------------|
155
+ | Delete empty folder | None | It's empty, nothing uses it |
156
+ | Delete hierarchical.py | Low | 0% coverage, no imports |
157
+ | Full consolidation | Medium | Many import changes |
158
+
159
+ ## Time Estimate
160
+
161
+ - Option A: 5 minutes
162
+ - Option B: 1-2 hours (plus testing)
src/app.py CHANGED
@@ -14,6 +14,7 @@ from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, Mock
14
  from src.orchestrator_factory import create_orchestrator
15
  from src.tools.clinicaltrials import ClinicalTrialsTool
16
  from src.tools.europepmc import EuropePMCTool
 
17
  from src.tools.pubmed import PubMedTool
18
  from src.tools.search_handler import SearchHandler
19
  from src.utils.config import settings
@@ -45,7 +46,7 @@ def configure_orchestrator(
45
 
46
  # Create search tools
47
  search_handler = SearchHandler(
48
- tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()],
49
  timeout=config.search_timeout,
50
  )
51
 
@@ -176,13 +177,7 @@ async def research_agent(
176
  # Immediate backend info + loading feedback so user knows something is happening
177
  yield (
178
  f"🧠 **Backend**: {backend_name}\n\n"
179
- "⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC...\n"
180
- )
181
-
182
- # Immediate loading feedback so user knows something is happening
183
- yield (
184
- f"🧠 **Backend**: {backend_name}\n\n"
185
- "⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC...\n"
186
  )
187
 
188
  async for event in orchestrator.run(message):
@@ -203,7 +198,8 @@ async def research_agent(
203
 
204
  # Handle complete events specially
205
  if event.type == "complete":
206
- yield event.message
 
207
  else:
208
  # Format and append non-streaming events
209
  event_md = event.to_markdown()
@@ -240,7 +236,7 @@ def create_demo() -> tuple[gr.ChatInterface, gr.Accordion]:
240
  title="🍆 DeepBoner",
241
  description=(
242
  "*AI-Powered Sexual Health Research Agent — searches PubMed, "
243
- "ClinicalTrials.gov & Europe PMC*\n\n"
244
  "Deep research for sexual wellness, ED treatments, hormone therapy, "
245
  "libido, and reproductive health - for all genders.\n\n"
246
  "---\n"
 
14
  from src.orchestrator_factory import create_orchestrator
15
  from src.tools.clinicaltrials import ClinicalTrialsTool
16
  from src.tools.europepmc import EuropePMCTool
17
+ from src.tools.openalex import OpenAlexTool
18
  from src.tools.pubmed import PubMedTool
19
  from src.tools.search_handler import SearchHandler
20
  from src.utils.config import settings
 
46
 
47
  # Create search tools
48
  search_handler = SearchHandler(
49
+ tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool(), OpenAlexTool()],
50
  timeout=config.search_timeout,
51
  )
52
 
 
177
  # Immediate backend info + loading feedback so user knows something is happening
178
  yield (
179
  f"🧠 **Backend**: {backend_name}\n\n"
180
+ "⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC, OpenAlex...\n"
 
 
 
 
 
 
181
  )
182
 
183
  async for event in orchestrator.run(message):
 
198
 
199
  # Handle complete events specially
200
  if event.type == "complete":
201
+ response_parts.append(event.message)
202
+ yield "\n\n".join(response_parts)
203
  else:
204
  # Format and append non-streaming events
205
  event_md = event.to_markdown()
 
236
  title="🍆 DeepBoner",
237
  description=(
238
  "*AI-Powered Sexual Health Research Agent — searches PubMed, "
239
+ "ClinicalTrials.gov, Europe PMC & OpenAlex*\n\n"
240
  "Deep research for sexual wellness, ED treatments, hormone therapy, "
241
  "libido, and reproductive health - for all genders.\n\n"
242
  "---\n"
src/orchestrator_factory.py CHANGED
@@ -52,6 +52,7 @@ def create_orchestrator(
52
  return orchestrator_cls(
53
  max_rounds=config.max_iterations if config else 10,
54
  api_key=api_key,
 
55
  )
56
 
57
  # Simple mode requires handlers
 
52
  return orchestrator_cls(
53
  max_rounds=config.max_iterations if config else 10,
54
  api_key=api_key,
55
+ timeout_seconds=settings.magentic_timeout,
56
  )
57
 
58
  # Simple mode requires handlers
src/orchestrator_hierarchical.py DELETED
@@ -1,95 +0,0 @@
1
- """Hierarchical orchestrator using middleware and sub-teams."""
2
-
3
- import asyncio
4
- from collections.abc import AsyncGenerator
5
-
6
- import structlog
7
-
8
- from src.agents.judge_agent_llm import LLMSubIterationJudge
9
- from src.agents.magentic_agents import create_search_agent
10
- from src.middleware.sub_iteration import SubIterationMiddleware, SubIterationTeam
11
- from src.services.embeddings import get_embedding_service
12
- from src.state import init_magentic_state
13
- from src.utils.models import AgentEvent
14
-
15
- logger = structlog.get_logger()
16
-
17
-
18
- class ResearchTeam(SubIterationTeam):
19
- """Adapts Magentic ChatAgent to SubIterationTeam protocol."""
20
-
21
- def __init__(self) -> None:
22
- self.agent = create_search_agent()
23
-
24
- async def execute(self, task: str) -> str:
25
- response = await self.agent.run(task)
26
- if response.messages:
27
- for msg in reversed(response.messages):
28
- if msg.role == "assistant" and msg.text:
29
- return str(msg.text)
30
- return "No response from agent."
31
-
32
-
33
- class HierarchicalOrchestrator:
34
- """Orchestrator that uses hierarchical teams and sub-iterations."""
35
-
36
- def __init__(self) -> None:
37
- self.team = ResearchTeam()
38
- self.judge = LLMSubIterationJudge()
39
- self.middleware = SubIterationMiddleware(self.team, self.judge, max_iterations=5)
40
-
41
- async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
42
- logger.info("Starting hierarchical orchestrator", query=query)
43
-
44
- try:
45
- service = get_embedding_service()
46
- init_magentic_state(service)
47
- except Exception as e:
48
- logger.warning(
49
- "Embedding service initialization failed, using default state",
50
- error=str(e),
51
- )
52
- init_magentic_state()
53
-
54
- yield AgentEvent(type="started", message=f"Starting research: {query}")
55
-
56
- queue: asyncio.Queue[AgentEvent | None] = asyncio.Queue()
57
-
58
- async def event_callback(event: AgentEvent) -> None:
59
- await queue.put(event)
60
-
61
- task_future = asyncio.create_task(self.middleware.run(query, event_callback))
62
-
63
- while not task_future.done():
64
- get_event = asyncio.create_task(queue.get())
65
- done, _ = await asyncio.wait(
66
- {task_future, get_event}, return_when=asyncio.FIRST_COMPLETED
67
- )
68
-
69
- if get_event in done:
70
- event = get_event.result()
71
- if event:
72
- yield event
73
- else:
74
- get_event.cancel()
75
-
76
- # Process remaining events
77
- while not queue.empty():
78
- ev = queue.get_nowait()
79
- if ev:
80
- yield ev
81
-
82
- try:
83
- result, assessment = await task_future
84
-
85
- assessment_text = assessment.reasoning if assessment else "None"
86
- yield AgentEvent(
87
- type="complete",
88
- message=(
89
- f"Research complete.\n\nResult:\n{result}\n\nAssessment:\n{assessment_text}"
90
- ),
91
- data={"assessment": assessment.model_dump() if assessment else None},
92
- )
93
- except Exception as e:
94
- logger.error("Orchestrator failed", error=str(e))
95
- yield AgentEvent(type="error", message=f"Orchestrator failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/orchestrator_magentic.py CHANGED
@@ -45,7 +45,7 @@ class MagenticOrchestrator:
45
  max_rounds: int = 10,
46
  chat_client: OpenAIChatClient | None = None,
47
  api_key: str | None = None,
48
- timeout_seconds: float = 300.0,
49
  ) -> None:
50
  """Initialize orchestrator.
51
 
@@ -53,7 +53,7 @@ class MagenticOrchestrator:
53
  max_rounds: Maximum coordination rounds
54
  chat_client: Optional shared chat client for agents
55
  api_key: Optional OpenAI API key (for BYOK)
56
- timeout_seconds: Maximum workflow duration (default: 5 minutes)
57
  """
58
  # Validate requirements only if no key provided
59
  if not chat_client and not api_key:
 
45
  max_rounds: int = 10,
46
  chat_client: OpenAIChatClient | None = None,
47
  api_key: str | None = None,
48
+ timeout_seconds: float = 600.0,
49
  ) -> None:
50
  """Initialize orchestrator.
51
 
 
53
  max_rounds: Maximum coordination rounds
54
  chat_client: Optional shared chat client for agents
55
  api_key: Optional OpenAI API key (for BYOK)
56
+ timeout_seconds: Maximum workflow duration (default: 10 minutes)
57
  """
58
  # Validate requirements only if no key provided
59
  if not chat_client and not api_key:
src/tools/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
  from src.tools.base import SearchTool
4
  from src.tools.clinicaltrials import ClinicalTrialsTool
5
  from src.tools.europepmc import EuropePMCTool
 
6
  from src.tools.pubmed import PubMedTool
7
  from src.tools.search_handler import SearchHandler
8
 
@@ -10,6 +11,7 @@ from src.tools.search_handler import SearchHandler
10
  __all__ = [
11
  "ClinicalTrialsTool",
12
  "EuropePMCTool",
 
13
  "PubMedTool",
14
  "SearchHandler",
15
  "SearchTool",
 
3
  from src.tools.base import SearchTool
4
  from src.tools.clinicaltrials import ClinicalTrialsTool
5
  from src.tools.europepmc import EuropePMCTool
6
+ from src.tools.openalex import OpenAlexTool
7
  from src.tools.pubmed import PubMedTool
8
  from src.tools.search_handler import SearchHandler
9
 
 
11
  __all__ = [
12
  "ClinicalTrialsTool",
13
  "EuropePMCTool",
14
+ "OpenAlexTool",
15
  "PubMedTool",
16
  "SearchHandler",
17
  "SearchTool",
src/tools/openalex.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OpenAlex search tool - citation-aware scholarly search."""
2
+
3
+ from typing import Any
4
+
5
+ import httpx
6
+ from tenacity import retry, stop_after_attempt, wait_exponential
7
+
8
+ from src.utils.exceptions import SearchError
9
+ from src.utils.models import Citation, Evidence
10
+
11
+
12
+ class OpenAlexTool:
13
+ """
14
+ Search OpenAlex for scholarly works with citation metrics.
15
+
16
+ OpenAlex indexes 209M+ works and provides:
17
+ - Citation counts (prioritize influential papers)
18
+ - Concept tagging (hierarchical classification)
19
+ - Open access links (direct PDF URLs)
20
+ - Related works (ML-powered similarity)
21
+
22
+ API Docs: https://docs.openalex.org
23
+ Rate Limits: Polite pool with mailto = 100k/day
24
+ """
25
+
26
+ BASE_URL = "https://api.openalex.org/works"
27
+ POLITE_EMAIL = "deepboner-research@proton.me"
28
+
29
+ @property
30
+ def name(self) -> str:
31
+ return "openalex"
32
+
33
+ @retry(
34
+ stop=stop_after_attempt(3),
35
+ wait=wait_exponential(multiplier=1, min=1, max=10),
36
+ reraise=True,
37
+ )
38
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
39
+ """
40
+ Search OpenAlex, sorted by citation count.
41
+
42
+ Args:
43
+ query: Search terms
44
+ max_results: Maximum results to return
45
+
46
+ Returns:
47
+ List of Evidence objects with citation metadata
48
+ """
49
+ params: dict[str, str | int] = {
50
+ "search": query,
51
+ "filter": "type:article,has_abstract:true", # Only articles with abstracts
52
+ "sort": "cited_by_count:desc", # Most cited first
53
+ "per_page": min(max_results, 100),
54
+ "mailto": self.POLITE_EMAIL,
55
+ }
56
+
57
+ async with httpx.AsyncClient(timeout=30.0) as client:
58
+ try:
59
+ response = await client.get(self.BASE_URL, params=params)
60
+ response.raise_for_status()
61
+
62
+ data = response.json()
63
+ works = data.get("results", [])
64
+
65
+ return [self._to_evidence(work) for work in works[:max_results]]
66
+
67
+ except httpx.HTTPStatusError as e:
68
+ raise SearchError(f"OpenAlex API error: {e}") from e
69
+ except httpx.RequestError as e:
70
+ raise SearchError(f"OpenAlex connection failed: {e}") from e
71
+
72
+ def _to_evidence(self, work: dict[str, Any]) -> Evidence:
73
+ """Convert OpenAlex work to Evidence with rich metadata."""
74
+ # Extract basic fields
75
+ title = work.get("display_name", "Untitled")
76
+ doi = work.get("doi", "")
77
+ year = work.get("publication_year", "Unknown")
78
+ cited_by_count = work.get("cited_by_count", 0)
79
+
80
+ # Reconstruct abstract from inverted index
81
+ abstract = self._reconstruct_abstract(work.get("abstract_inverted_index"))
82
+ if not abstract:
83
+ # Should be caught by filter=has_abstract:true, but defensive coding
84
+ abstract = f"[No abstract available. Cited by {cited_by_count} works.]"
85
+
86
+ # Extract authors (limit to 5)
87
+ authors = self._extract_authors(work.get("authorships", []))
88
+
89
+ # Extract concepts (top 5 by score)
90
+ concepts = self._extract_concepts(work.get("concepts", []))
91
+
92
+ # Open access info
93
+ oa_info = work.get("open_access", {})
94
+ is_oa = oa_info.get("is_oa", False)
95
+
96
+ # Get PDF URL (prefer best_oa_location)
97
+ best_oa = work.get("best_oa_location", {})
98
+ pdf_url = best_oa.get("pdf_url") if best_oa else None
99
+
100
+ # Build URL
101
+ if doi:
102
+ url = doi if doi.startswith("http") else f"https://doi.org/{doi}"
103
+ else:
104
+ openalex_id = work.get("id", "")
105
+ url = openalex_id if openalex_id else "https://openalex.org"
106
+
107
+ # Prepend citation badge to content
108
+ citation_badge = f"[Cited by {cited_by_count}] " if cited_by_count > 0 else ""
109
+ content = f"{citation_badge}{abstract[:1900]}"
110
+
111
+ # Calculate relevance: normalized citation count (capped at 1.0 for 100 citations)
112
+ # 100 citations is a very strong signal in most fields.
113
+ relevance = min(1.0, cited_by_count / 100.0)
114
+
115
+ return Evidence(
116
+ content=content[:2000],
117
+ citation=Citation(
118
+ source="openalex",
119
+ title=title[:500],
120
+ url=url,
121
+ date=str(year),
122
+ authors=authors,
123
+ ),
124
+ relevance=relevance,
125
+ metadata={
126
+ "cited_by_count": cited_by_count,
127
+ "concepts": concepts,
128
+ "is_open_access": is_oa,
129
+ "pdf_url": pdf_url,
130
+ },
131
+ )
132
+
133
+ def _reconstruct_abstract(self, inverted_index: dict[str, list[int]] | None) -> str:
134
+ """Rebuild abstract from {"word": [positions]} format."""
135
+ if not inverted_index:
136
+ return ""
137
+
138
+ position_word: dict[int, str] = {}
139
+ for word, positions in inverted_index.items():
140
+ for pos in positions:
141
+ position_word[pos] = word
142
+
143
+ if not position_word:
144
+ return ""
145
+
146
+ max_pos = max(position_word.keys())
147
+ return " ".join(position_word.get(i, "") for i in range(max_pos + 1))
148
+
149
+ def _extract_authors(self, authorships: list[dict[str, Any]]) -> list[str]:
150
+ """Extract author names from authorships array."""
151
+ authors = []
152
+ for authorship in authorships[:5]:
153
+ author = authorship.get("author", {})
154
+ name = author.get("display_name")
155
+ if name:
156
+ authors.append(name)
157
+ return authors
158
+
159
+ def _extract_concepts(self, concepts: list[dict[str, Any]]) -> list[str]:
160
+ """Extract concept names, sorted by score."""
161
+ sorted_concepts = sorted(concepts, key=lambda c: c.get("score", 0), reverse=True)
162
+ return [c.get("display_name", "") for c in sorted_concepts[:5] if c.get("display_name")]
src/utils/config.py CHANGED
@@ -57,6 +57,10 @@ class Settings(BaseSettings):
57
  # Agent Configuration
58
  max_iterations: int = Field(default=10, ge=1, le=50)
59
  search_timeout: int = Field(default=30, description="Seconds to wait for search")
 
 
 
 
60
 
61
  # Logging
62
  log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
 
57
  # Agent Configuration
58
  max_iterations: int = Field(default=10, ge=1, le=50)
59
  search_timeout: int = Field(default=30, description="Seconds to wait for search")
60
+ magentic_timeout: int = Field(
61
+ default=600,
62
+ description="Timeout for Magentic mode in seconds",
63
+ )
64
 
65
  # Logging
66
  log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
tests/unit/test_app_timeout.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for app timeout and history preservation."""
2
+
3
+ import os
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ import pytest
7
+
8
+ from src.app import research_agent
9
+ from src.utils.models import AgentEvent
10
+
11
+
12
+ async def async_gen(items):
13
+ for item in items:
14
+ yield item
15
+
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_complete_event_preserves_history():
19
+ """
20
+ Verify that a 'complete' event (like timeout) appends to the history
21
+ instead of replacing it.
22
+ """
23
+ # Mock events: Progress -> Progress -> Complete
24
+ mock_events = [
25
+ AgentEvent(type="thinking", message="Step 1: Thinking...", iteration=0),
26
+ AgentEvent(type="search_complete", message="Step 2: Found data", iteration=1),
27
+ AgentEvent(type="complete", message="Timeout: Synthesizing...", iteration=1),
28
+ ]
29
+
30
+ # Create a mock orchestrator that yields these events
31
+ mock_orchestrator = MagicMock()
32
+ # The run method should return an async generator
33
+ mock_orchestrator.run.side_effect = lambda msg: async_gen(mock_events)
34
+
35
+ # Patch configure_orchestrator to return our mock
36
+ with patch("src.app.configure_orchestrator") as mock_config:
37
+ mock_config.return_value = (mock_orchestrator, "Mock Backend")
38
+
39
+ # Run the agent
40
+ results = []
41
+ async for output in research_agent("test query", [], "simple"):
42
+ results.append(output)
43
+
44
+ # The final output should contain the accumulated history AND the timeout message
45
+ final_output = results[-1]
46
+
47
+ # Check for preservation
48
+ assert "Step 1: Thinking..." in final_output
49
+ assert "Step 2: Found data" in final_output
50
+ assert "Timeout: Synthesizing..." in final_output
51
+
52
+
53
+ @pytest.mark.asyncio
54
+ async def test_timeout_configurable():
55
+ """Verify MAGENTIC_TIMEOUT env var is respected."""
56
+ from src.utils.config import Settings
57
+
58
+ with patch.dict(os.environ, {"MAGENTIC_TIMEOUT": "120"}):
59
+ settings = Settings()
60
+ assert settings.magentic_timeout == 120
tests/unit/tools/test_openalex.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for OpenAlex tool."""
2
+
3
+ from unittest.mock import AsyncMock, MagicMock
4
+
5
+ import pytest
6
+
7
+ from src.tools.openalex import OpenAlexTool
8
+ from src.utils.models import Evidence
9
+
10
+ # Sample OpenAlex response
11
+ SAMPLE_OPENALEX_RESPONSE = {
12
+ "results": [
13
+ {
14
+ "id": "https://openalex.org/W12345",
15
+ "doi": "https://doi.org/10.1234/test",
16
+ "display_name": "Metformin in Cancer Treatment",
17
+ "publication_year": 2024,
18
+ "cited_by_count": 150,
19
+ "abstract_inverted_index": {
20
+ "Metformin": [0],
21
+ "shows": [1],
22
+ "promise": [2],
23
+ "in": [3],
24
+ "cancer": [4],
25
+ "treatment": [5],
26
+ },
27
+ "concepts": [
28
+ {"display_name": "Metformin", "score": 0.95, "level": 2},
29
+ {"display_name": "Cancer", "score": 0.88, "level": 1},
30
+ ],
31
+ "authorships": [
32
+ {"author": {"display_name": "John Smith"}},
33
+ {"author": {"display_name": "Jane Doe"}},
34
+ ],
35
+ "open_access": {"is_oa": True, "oa_url": "https://example.com/oa"},
36
+ "best_oa_location": {"pdf_url": "https://example.com/paper.pdf"},
37
+ }
38
+ ]
39
+ }
40
+
41
+
42
+ @pytest.mark.unit
43
+ class TestOpenAlexTool:
44
+ """Tests for OpenAlexTool."""
45
+
46
+ @pytest.fixture
47
+ def tool(self) -> OpenAlexTool:
48
+ return OpenAlexTool()
49
+
50
+ @pytest.fixture
51
+ def mock_client(self, mocker):
52
+ """Create a standardized mock client with context manager support."""
53
+ client = AsyncMock()
54
+ client.__aenter__.return_value = client
55
+ client.__aexit__.return_value = None
56
+
57
+ # Standard response mock
58
+ resp = MagicMock()
59
+ resp.json.return_value = SAMPLE_OPENALEX_RESPONSE
60
+ resp.raise_for_status.return_value = None
61
+ client.get.return_value = resp
62
+
63
+ mocker.patch("httpx.AsyncClient", return_value=client)
64
+ return client
65
+
66
+ def test_tool_name(self, tool: OpenAlexTool) -> None:
67
+ """Tool name should be 'openalex'."""
68
+ assert tool.name == "openalex"
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_search_returns_evidence(self, tool: OpenAlexTool, mock_client) -> None:
72
+ """Search should return Evidence objects."""
73
+ results = await tool.search("metformin cancer", max_results=5)
74
+
75
+ assert len(results) == 1
76
+ assert isinstance(results[0], Evidence)
77
+ assert results[0].citation.source == "openalex"
78
+
79
+ @pytest.mark.asyncio
80
+ async def test_search_includes_citation_count(self, tool: OpenAlexTool, mock_client) -> None:
81
+ """Evidence metadata should include cited_by_count."""
82
+ results = await tool.search("metformin cancer", max_results=5)
83
+ assert results[0].metadata["cited_by_count"] == 150
84
+
85
+ @pytest.mark.asyncio
86
+ async def test_search_calculates_relevance(self, tool: OpenAlexTool, mock_client) -> None:
87
+ """Evidence relevance should be based on citations (capped at 1.0)."""
88
+ results = await tool.search("metformin cancer", max_results=5)
89
+ # 150 citations / 100 = 1.5 -> capped at 1.0
90
+ assert results[0].relevance == 1.0
91
+
92
+ @pytest.mark.asyncio
93
+ async def test_search_includes_concepts(self, tool: OpenAlexTool, mock_client) -> None:
94
+ """Evidence metadata should include concepts."""
95
+ results = await tool.search("metformin cancer", max_results=5)
96
+ assert "Metformin" in results[0].metadata["concepts"]
97
+ assert "Cancer" in results[0].metadata["concepts"]
98
+
99
+ @pytest.mark.asyncio
100
+ async def test_search_includes_open_access_info(self, tool: OpenAlexTool, mock_client) -> None:
101
+ """Evidence metadata should include open access info."""
102
+ results = await tool.search("metformin cancer", max_results=5)
103
+ assert results[0].metadata["is_open_access"] is True
104
+ assert results[0].metadata["pdf_url"] == "https://example.com/paper.pdf"
105
+
106
+ def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
107
+ """Abstract reconstruction from inverted index."""
108
+ inverted_index = {
109
+ "Hello": [0],
110
+ "world": [1],
111
+ "this": [2],
112
+ "is": [3],
113
+ "a": [4],
114
+ "test": [5],
115
+ }
116
+ result = tool._reconstruct_abstract(inverted_index)
117
+ assert result == "Hello world this is a test"
118
+
119
+ def test_reconstruct_abstract_empty(self, tool: OpenAlexTool) -> None:
120
+ """Handle None or empty inverted index."""
121
+ assert tool._reconstruct_abstract(None) == ""
122
+ assert tool._reconstruct_abstract({}) == ""
123
+
124
+ @pytest.mark.asyncio
125
+ async def test_search_empty_results(self, tool: OpenAlexTool, mock_client) -> None:
126
+ """Handle empty results gracefully."""
127
+ mock_client.get.return_value.json.return_value = {"results": []}
128
+
129
+ results = await tool.search("xyznonexistent123", max_results=5)
130
+
131
+ assert results == []
132
+
133
+ @pytest.mark.asyncio
134
+ async def test_search_params(self, tool: OpenAlexTool, mock_client) -> None:
135
+ """Verify API call requests citation-sorted results and uses polite pool."""
136
+ mock_client.get.return_value.json.return_value = {"results": []}
137
+
138
+ await tool.search("test query", max_results=5)
139
+
140
+ # Verify call params
141
+ call_args = mock_client.get.call_args
142
+ params = call_args[1]["params"]
143
+ assert params["sort"] == "cited_by_count:desc"
144
+ assert params["mailto"] == tool.POLITE_EMAIL
145
+ assert "type:article" in params["filter"]
146
+ assert "has_abstract:true" in params["filter"]
147
+
148
+
149
+ @pytest.mark.integration
150
+ class TestOpenAlexIntegration:
151
+ """Integration tests with real OpenAlex API."""
152
+
153
+ @pytest.mark.asyncio
154
+ async def test_real_api_returns_results(self) -> None:
155
+ """Test actual API returns relevant results."""
156
+ tool = OpenAlexTool()
157
+ results = await tool.search("metformin cancer treatment", max_results=3)
158
+
159
+ assert len(results) > 0
160
+ # Should have citation counts
161
+ assert results[0].metadata["cited_by_count"] >= 0
162
+ # Should have abstract text
163
+ assert len(results[0].content) > 50
164
+ # Should have concepts
165
+ assert len(results[0].metadata["concepts"]) > 0