Merge pull request #124 from The-Obstacle-Is-The-Way/fix/p1-forced-synthesis
Browse files
SPEC_ARCHITECTURAL_DEBT.md
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Architectural Debt Specification
|
| 2 |
+
|
| 3 |
+
> **Status**: IMPLEMENTED (Phase 1 Complete)
|
| 4 |
+
> **Date**: 2025-12-04
|
| 5 |
+
> **Scope**: `src/orchestrators/advanced.py` (Primary), System-Wide (Secondary)
|
| 6 |
+
> **Purpose**: Roadmap for "DeepMind Status" Code Quality
|
| 7 |
+
> **Author**: Claude (Senior Review Incorporated)
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## Executive Summary
|
| 12 |
+
|
| 13 |
+
The P1/P2 bug fixes in PR #124 introduced technical debt that must be addressed before the PR is considered "done". This spec documents three immediate priorities (DRY violation, redundant imports, magic strings) and five medium-term system-wide improvements.
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## Part 1: Immediate Cleanup (MUST Complete Before PR Merge)
|
| 18 |
+
|
| 19 |
+
### Priority 1: DRY Violation - Synthesis Methods
|
| 20 |
+
|
| 21 |
+
**Problem**: `_handle_timeout()` (lines 201-248) and `_force_synthesis()` (lines 250-297) are **95% identical**.
|
| 22 |
+
|
| 23 |
+
| `_handle_timeout()` | `_force_synthesis()` |
|
| 24 |
+
|---------------------|----------------------|
|
| 25 |
+
| Lines 201-248 (47 lines) | Lines 250-297 (47 lines) |
|
| 26 |
+
| Yields "Workflow timed out. Synthesizing..." | Yields "Synthesizing research findings..." |
|
| 27 |
+
| Error data: `timeout_synthesis` | Error data: `forced_synthesis` |
|
| 28 |
+
| **Everything else is identical** | **Everything else is identical** |
|
| 29 |
+
|
| 30 |
+
**SOLID Violation**: **DRY (Don't Repeat Yourself)**. Changes to synthesis logic must be made in two places. This is a maintenance nightmare and a source of future bugs.
|
| 31 |
+
|
| 32 |
+
**Fix**: Extract unified method `_synthesize_fallback(iteration: int, reason: str)`:
|
| 33 |
+
|
| 34 |
+
```python
|
| 35 |
+
async def _synthesize_fallback(
|
| 36 |
+
self, iteration: int, reason: str
|
| 37 |
+
) -> AsyncGenerator[AgentEvent, None]:
|
| 38 |
+
"""
|
| 39 |
+
Unified fallback synthesis for all termination scenarios.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
iteration: Current workflow iteration
|
| 43 |
+
reason: Why synthesis is being forced ("timeout", "no_reporter", "max_rounds")
|
| 44 |
+
"""
|
| 45 |
+
status_messages = {
|
| 46 |
+
"timeout": "Workflow timed out. Synthesizing available evidence...",
|
| 47 |
+
"no_reporter": "Synthesizing research findings...",
|
| 48 |
+
"max_rounds": "Max rounds reached. Synthesizing findings...",
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
state = get_magentic_state()
|
| 53 |
+
evidence_summary = await state.memory.get_context_summary()
|
| 54 |
+
report_agent = create_report_agent(self._chat_client, domain=self.domain)
|
| 55 |
+
|
| 56 |
+
yield AgentEvent(
|
| 57 |
+
type="synthesizing",
|
| 58 |
+
message=status_messages.get(reason, "Synthesizing..."),
|
| 59 |
+
iteration=iteration,
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
synthesis_result = await report_agent.run(
|
| 63 |
+
f"Synthesize research report from this evidence. "
|
| 64 |
+
f"If evidence is sparse, say so.\n\n{evidence_summary}"
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
yield AgentEvent(
|
| 68 |
+
type="complete",
|
| 69 |
+
message=synthesis_result.text,
|
| 70 |
+
data={"reason": f"{reason}_synthesis", "iterations": iteration},
|
| 71 |
+
iteration=iteration,
|
| 72 |
+
)
|
| 73 |
+
except Exception as synth_error:
|
| 74 |
+
logger.error(f"{reason} synthesis failed", error=str(synth_error))
|
| 75 |
+
yield AgentEvent(
|
| 76 |
+
type="complete",
|
| 77 |
+
message=f"Research completed. Synthesis failed: {synth_error}",
|
| 78 |
+
data={"reason": f"{reason}_synthesis_failed", "iterations": iteration},
|
| 79 |
+
iteration=iteration,
|
| 80 |
+
)
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
**Call Sites to Update**:
|
| 84 |
+
1. Line 447: `async for event in self._handle_timeout(iteration):` → `self._synthesize_fallback(iteration, "timeout")`
|
| 85 |
+
2. Line 412: `async for synth_event in self._force_synthesis(iteration):` → `self._synthesize_fallback(iteration, "no_reporter")`
|
| 86 |
+
3. Line 432: `async for synth_event in self._force_synthesis(iteration):` → `self._synthesize_fallback(iteration, "max_rounds")`
|
| 87 |
+
|
| 88 |
+
**Delete After Refactor**:
|
| 89 |
+
- `_handle_timeout()` method (lines 201-248)
|
| 90 |
+
- `_force_synthesis()` method (lines 250-297)
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
### Priority 2: Redundant Imports
|
| 95 |
+
|
| 96 |
+
**Problem**: Imports inside methods that already exist at module level.
|
| 97 |
+
|
| 98 |
+
| Location | Import | Already At |
|
| 99 |
+
|----------|--------|------------|
|
| 100 |
+
| Line 207 | `from src.agents.magentic_agents import create_report_agent` | Line 35 |
|
| 101 |
+
| Line 208 | `from src.agents.state import get_magentic_state` | Missing! |
|
| 102 |
+
| Line 257 | `from src.agents.magentic_agents import create_report_agent` | Line 35 |
|
| 103 |
+
| Line 258 | `from src.agents.state import get_magentic_state` | Missing! |
|
| 104 |
+
|
| 105 |
+
**SOLID Violation**: **SRP (Single Responsibility)**. Import management is scattered across the file instead of centralized at the top.
|
| 106 |
+
|
| 107 |
+
**Fix**:
|
| 108 |
+
1. Add to module-level imports (around line 38):
|
| 109 |
+
```python
|
| 110 |
+
from src.agents.state import get_magentic_state, init_magentic_state
|
| 111 |
+
```
|
| 112 |
+
Note: `init_magentic_state` is already imported at line 38. Add `get_magentic_state` to that import.
|
| 113 |
+
|
| 114 |
+
2. Remove redundant imports from:
|
| 115 |
+
- Lines 207-208 (inside `_handle_timeout`)
|
| 116 |
+
- Lines 257-258 (inside `_force_synthesis`)
|
| 117 |
+
|
| 118 |
+
---
|
| 119 |
+
|
| 120 |
+
### Priority 3: Magic Strings
|
| 121 |
+
|
| 122 |
+
**Problem**: Agent detection relies on string literals that break silently if agents are renamed.
|
| 123 |
+
|
| 124 |
+
**Current Code** (line 385):
|
| 125 |
+
```python
|
| 126 |
+
agent_name = (event.agent_id or "").lower()
|
| 127 |
+
if "report" in agent_name: # FRAGILE: Breaks if agent renamed
|
| 128 |
+
reporter_ran = True
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
**Also in** `_get_event_type_for_agent()` (lines 593-602):
|
| 132 |
+
```python
|
| 133 |
+
if "search" in agent_lower: # Magic string
|
| 134 |
+
if "judge" in agent_lower: # Magic string
|
| 135 |
+
if "hypothes" in agent_lower: # Magic string
|
| 136 |
+
if "report" in agent_lower: # Magic string
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
**SOLID Violation**: **OCP (Open/Closed Principle)**. Renaming an agent requires changes in multiple locations.
|
| 140 |
+
|
| 141 |
+
**Fix Option A** - Constants:
|
| 142 |
+
```python
|
| 143 |
+
# At module level (after imports)
|
| 144 |
+
REPORTER_AGENT_ID = "reporter"
|
| 145 |
+
SEARCHER_AGENT_ID = "searcher"
|
| 146 |
+
JUDGE_AGENT_ID = "judge"
|
| 147 |
+
HYPOTHESIZER_AGENT_ID = "hypothesizer"
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
**Fix Option B** - Agent Name Attribute (Preferred):
|
| 151 |
+
```python
|
| 152 |
+
# In magentic_agents.py, ensure each agent has a .name attribute
|
| 153 |
+
# Then in advanced.py:
|
| 154 |
+
if event.agent_id == report_agent.name:
|
| 155 |
+
reporter_ran = True
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
**Recommendation**: Option A is simpler and doesn't require modifying agent factory. Use constants.
|
| 159 |
+
|
| 160 |
+
---
|
| 161 |
+
|
| 162 |
+
## Part 2: System-Wide Issues (Future PRs)
|
| 163 |
+
|
| 164 |
+
These are valid concerns identified during code review but are NOT blockers for the current PR.
|
| 165 |
+
|
| 166 |
+
### Priority 4: Dead Config
|
| 167 |
+
|
| 168 |
+
**Location**: `src/utils/config.py`
|
| 169 |
+
**Issue**: Zombie configuration values that are never used or raise NotImplemented.
|
| 170 |
+
- `magentic_timeout`: Deprecated, never read
|
| 171 |
+
- `anthropic_api_key`: Config exists but factory raises NotImplemented
|
| 172 |
+
|
| 173 |
+
**Fix**: Audit config.py, remove dead settings, add deprecation warnings for transitional settings.
|
| 174 |
+
|
| 175 |
+
---
|
| 176 |
+
|
| 177 |
+
### Priority 5: Prompt Unification
|
| 178 |
+
|
| 179 |
+
**Location**: `src/prompts/` vs `src/config/domain.py`
|
| 180 |
+
**Issue**: Two sources of truth for prompts. `src/prompts/` files exist but are ignored. System uses hardcoded strings in `domain.py`.
|
| 181 |
+
|
| 182 |
+
**Fix**: Pick ONE source of truth. Recommendation: Delete `src/prompts/` if unused, or migrate `domain.py` prompts there.
|
| 183 |
+
|
| 184 |
+
---
|
| 185 |
+
|
| 186 |
+
### Priority 6: Factory Monolith
|
| 187 |
+
|
| 188 |
+
**Location**: `src/clients/factory.py`
|
| 189 |
+
**Issue**: Hardcoded logic for detecting API key prefixes (`sk-` → OpenAI, `sk-ant-` → Anthropic error).
|
| 190 |
+
|
| 191 |
+
**SOLID Violation**: OCP. Adding a new provider requires modifying the factory.
|
| 192 |
+
|
| 193 |
+
**Fix**: Provider registry pattern with auto-registration, or strategy pattern with key prefix handlers.
|
| 194 |
+
|
| 195 |
+
---
|
| 196 |
+
|
| 197 |
+
### Priority 7: State Class
|
| 198 |
+
|
| 199 |
+
**Location**: `src/orchestrators/advanced.py` `run()` method
|
| 200 |
+
**Issue**: Method manages 6+ loose variables (`iteration`, `reporter_ran`, `buffer`, `current_agent_id`, `last_streamed_length`, `final_event_received`).
|
| 201 |
+
|
| 202 |
+
**Fix**: Extract to `WorkflowState` dataclass:
|
| 203 |
+
```python
|
| 204 |
+
@dataclass
|
| 205 |
+
class WorkflowState:
|
| 206 |
+
iteration: int = 0
|
| 207 |
+
reporter_ran: bool = False
|
| 208 |
+
current_message_buffer: str = ""
|
| 209 |
+
current_agent_id: str | None = None
|
| 210 |
+
last_streamed_length: int = 0
|
| 211 |
+
final_event_received: bool = False
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
---
|
| 215 |
+
|
| 216 |
+
### Priority 8: Real Integration Tests
|
| 217 |
+
|
| 218 |
+
**Location**: `tests/e2e/`
|
| 219 |
+
**Issue**: We deleted flaky integration tests. Now we have ZERO automated tests against real APIs.
|
| 220 |
+
|
| 221 |
+
**Fix**: Create stable `make test-live` suite with:
|
| 222 |
+
- Real HuggingFace Free Tier test
|
| 223 |
+
- Real OpenAI BYOK test
|
| 224 |
+
- Proper timeout handling
|
| 225 |
+
- Skip markers for CI (run manually or on schedule)
|
| 226 |
+
|
| 227 |
+
---
|
| 228 |
+
|
| 229 |
+
## Regression Prevention Strategy
|
| 230 |
+
|
| 231 |
+
**CRITICAL**: Each phase MUST pass smoke tests before merge. Unit tests alone are insufficient.
|
| 232 |
+
|
| 233 |
+
### Smoke Test Infrastructure
|
| 234 |
+
|
| 235 |
+
Add to `Makefile`:
|
| 236 |
+
```makefile
|
| 237 |
+
# Smoke tests - run against real APIs (slow, not for CI)
|
| 238 |
+
smoke-free:
|
| 239 |
+
@echo "Running Free Tier smoke test..."
|
| 240 |
+
uv run python -m pytest tests/e2e/test_smoke.py::test_free_tier_synthesis -v -s --timeout=600
|
| 241 |
+
|
| 242 |
+
smoke-paid:
|
| 243 |
+
@echo "Running Paid Tier smoke test (requires OPENAI_API_KEY)..."
|
| 244 |
+
uv run python -m pytest tests/e2e/test_smoke.py::test_paid_tier_synthesis -v -s --timeout=300
|
| 245 |
+
|
| 246 |
+
smoke: smoke-free # Default to free tier
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
### Smoke Test Implementation
|
| 250 |
+
|
| 251 |
+
Create `tests/e2e/test_smoke.py`:
|
| 252 |
+
```python
|
| 253 |
+
"""
|
| 254 |
+
Smoke tests for regression prevention.
|
| 255 |
+
|
| 256 |
+
These tests run against REAL APIs and verify end-to-end functionality.
|
| 257 |
+
They are slow (2-5 minutes) and should NOT run in CI.
|
| 258 |
+
|
| 259 |
+
Usage:
|
| 260 |
+
make smoke-free # Test Free Tier (HuggingFace)
|
| 261 |
+
make smoke-paid # Test Paid Tier (OpenAI BYOK)
|
| 262 |
+
"""
|
| 263 |
+
import pytest
|
| 264 |
+
from src.orchestrators.advanced import AdvancedOrchestrator
|
| 265 |
+
|
| 266 |
+
@pytest.mark.e2e
|
| 267 |
+
@pytest.mark.timeout(600) # 10 minute timeout for Free Tier
|
| 268 |
+
async def test_free_tier_synthesis():
|
| 269 |
+
"""Verify Free Tier produces actual synthesis (not just 'Research complete.')"""
|
| 270 |
+
orch = AdvancedOrchestrator(max_rounds=2)
|
| 271 |
+
|
| 272 |
+
events = []
|
| 273 |
+
async for event in orch.run("What is libido?"):
|
| 274 |
+
events.append(event)
|
| 275 |
+
|
| 276 |
+
# MUST have a complete event
|
| 277 |
+
complete_events = [e for e in events if e.type == "complete"]
|
| 278 |
+
assert len(complete_events) >= 1, "No complete event received"
|
| 279 |
+
|
| 280 |
+
# Complete event MUST have substantive content (not just signal)
|
| 281 |
+
final = complete_events[-1]
|
| 282 |
+
assert len(final.message) > 100, f"Synthesis too short: {len(final.message)} chars"
|
| 283 |
+
assert "Research complete." not in final.message or len(final.message) > 50, \
|
| 284 |
+
"Got empty synthesis signal instead of actual report"
|
| 285 |
+
|
| 286 |
+
# Should NOT have duplicate content
|
| 287 |
+
messages = [e.message for e in events if e.message]
|
| 288 |
+
# Check for exact duplicates of long content
|
| 289 |
+
long_messages = [m for m in messages if len(m) > 200]
|
| 290 |
+
assert len(long_messages) == len(set(long_messages)), "Duplicate content detected"
|
| 291 |
+
|
| 292 |
+
@pytest.mark.e2e
|
| 293 |
+
@pytest.mark.timeout(300) # 5 minute timeout for Paid Tier
|
| 294 |
+
async def test_paid_tier_synthesis():
|
| 295 |
+
"""Verify Paid Tier (BYOK) produces synthesis."""
|
| 296 |
+
import os
|
| 297 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
| 298 |
+
if not api_key:
|
| 299 |
+
pytest.skip("OPENAI_API_KEY not set")
|
| 300 |
+
|
| 301 |
+
orch = AdvancedOrchestrator(max_rounds=2, api_key=api_key)
|
| 302 |
+
|
| 303 |
+
events = []
|
| 304 |
+
async for event in orch.run("What is libido?"):
|
| 305 |
+
events.append(event)
|
| 306 |
+
|
| 307 |
+
complete_events = [e for e in events if e.type == "complete"]
|
| 308 |
+
assert len(complete_events) >= 1
|
| 309 |
+
assert len(complete_events[-1].message) > 100
|
| 310 |
+
```
|
| 311 |
+
|
| 312 |
+
### Phase Gate Checklist
|
| 313 |
+
|
| 314 |
+
Before merging ANY refactoring PR:
|
| 315 |
+
|
| 316 |
+
```text
|
| 317 |
+
[ ] make check # All 318+ unit tests pass
|
| 318 |
+
[ ] make smoke-free # Free Tier produces real synthesis
|
| 319 |
+
[ ] make smoke-paid # Paid Tier works (if you have key)
|
| 320 |
+
[ ] CodeRabbit approved # No blocking issues
|
| 321 |
+
```
|
| 322 |
+
|
| 323 |
+
---
|
| 324 |
+
|
| 325 |
+
## Execution Strategy
|
| 326 |
+
|
| 327 |
+
### Phase 1: Current PR (REQUIRED)
|
| 328 |
+
Implement **Priority 1, 2, and 3** before merging PR #124.
|
| 329 |
+
|
| 330 |
+
**Definition of Done**:
|
| 331 |
+
- [x] `_synthesize_fallback(iteration, reason)` implemented
|
| 332 |
+
- [x] `_handle_timeout()` and `_force_synthesis()` deleted
|
| 333 |
+
- [x] All synthesis call sites updated
|
| 334 |
+
- [x] Redundant imports removed
|
| 335 |
+
- [x] `get_magentic_state` added to module-level imports
|
| 336 |
+
- [x] Magic strings replaced with constants
|
| 337 |
+
- [x] All tests pass (`make check`)
|
| 338 |
+
|
| 339 |
+
### Phase 2: Future PRs (Separate Tickets)
|
| 340 |
+
Create GitHub issues for Priority 4-8. Do NOT bloat the current bug fix PR.
|
| 341 |
+
|
| 342 |
+
---
|
| 343 |
+
|
| 344 |
+
## Appendix: Line Number Reference
|
| 345 |
+
|
| 346 |
+
| Item | Current Location |
|
| 347 |
+
|------|------------------|
|
| 348 |
+
| `_handle_timeout()` | Lines 201-248 |
|
| 349 |
+
| `_force_synthesis()` | Lines 250-297 |
|
| 350 |
+
| Redundant imports (timeout) | Lines 207-208 |
|
| 351 |
+
| Redundant imports (force) | Lines 257-258 |
|
| 352 |
+
| Magic string detection | Line 385 |
|
| 353 |
+
| `_get_event_type_for_agent()` | Lines 582-602 |
|
| 354 |
+
| Module imports | Lines 18-48 |
|
| 355 |
+
| `run()` method | Lines 299-456 |
|
docs/bugs/ACTIVE_BUGS.md
CHANGED
|
@@ -57,6 +57,7 @@ All resolved bugs have been moved to `docs/bugs/archive/`. Summary:
|
|
| 57 |
- **P0 Advanced Mode Timeout No Synthesis** - FIXED, actual synthesis on timeout
|
| 58 |
|
| 59 |
### P1 Bugs (All FIXED)
|
|
|
|
| 60 |
- **P1 Free Tier Tool Execution Failure** - FIXED in PR fix/P1-free-tier-tool-execution, removed premature marker
|
| 61 |
- **P1 Gradio Example Click Auto-Submits** - FIXED in PR #120, prevents auto-submit on example click
|
| 62 |
- **P1 HuggingFace Router 401 Hyperbolic** - FIXED, invalid token was root cause
|
|
|
|
| 57 |
- **P0 Advanced Mode Timeout No Synthesis** - FIXED, actual synthesis on timeout
|
| 58 |
|
| 59 |
### P1 Bugs (All FIXED)
|
| 60 |
+
- **P1 No Synthesis Free Tier** - FIXED in PR fix/p1-forced-synthesis, forced synthesis safety net when ReportAgent doesn't run
|
| 61 |
- **P1 Free Tier Tool Execution Failure** - FIXED in PR fix/P1-free-tier-tool-execution, removed premature marker
|
| 62 |
- **P1 Gradio Example Click Auto-Submits** - FIXED in PR #120, prevents auto-submit on example click
|
| 63 |
- **P1 HuggingFace Router 401 Hyperbolic** - FIXED, invalid token was root cause
|
docs/bugs/P1_NO_SYNTHESIS_FREE_TIER.md
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P1 Bug: No Synthesis Report in Free Tier (Premature Workflow Termination)
|
| 2 |
+
|
| 3 |
+
**Date**: 2025-12-04
|
| 4 |
+
**Status**: FIXED (PR fix/p1-forced-synthesis)
|
| 5 |
+
**Severity**: P1 (Critical UX - No usable output from research)
|
| 6 |
+
**Component**: `src/orchestrators/advanced.py`
|
| 7 |
+
**Affects**: Free Tier (HuggingFace) primarily, potentially Paid Tier
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## Executive Summary
|
| 12 |
+
|
| 13 |
+
The workflow terminates without the ReportAgent ever producing a synthesis report. Users see search results and hypotheses streaming, but the final output is just "Research complete." with no actual research report. This is caused by the 7B Manager model failing to properly delegate to ReportAgent before workflow termination.
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## Symptom
|
| 18 |
+
|
| 19 |
+
```text
|
| 20 |
+
📚 **SEARCH_COMPLETE**: searcher: [search results]
|
| 21 |
+
⏱️ **PROGRESS**: Round 1/5 (~3m 0s remaining)
|
| 22 |
+
🔬 **HYPOTHESIZING**: hypothesizer: [hypotheses]
|
| 23 |
+
⏱️ **PROGRESS**: Round 2/5 (~2m 15s remaining)
|
| 24 |
+
✅ **JUDGE_COMPLETE**: judge: [asks for more evidence]
|
| 25 |
+
⏱️ **PROGRESS**: Round 4/5 (~45s remaining)
|
| 26 |
+
Research complete.
|
| 27 |
+
Research complete. ← NO SYNTHESIS REPORT!
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
The workflow runs through multiple agents (Search, Hypothesis, Judge) but never reaches the ReportAgent. The user receives no usable research report.
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
## Root Cause Analysis
|
| 35 |
+
|
| 36 |
+
### Primary Issue: Manager Model Failure
|
| 37 |
+
|
| 38 |
+
The `with_standard_manager()` in Microsoft Agent Framework uses the provided chat client (HuggingFace 7B model) to coordinate agents. The 7B model:
|
| 39 |
+
|
| 40 |
+
1. **Cannot follow complex multi-step instructions** - The manager prompt instructs: "When JudgeAgent says SUFFICIENT EVIDENCE → delegate to ReportAgent." The 7B model doesn't reliably follow this.
|
| 41 |
+
|
| 42 |
+
2. **Triggers premature termination** - The framework has `max_stall_count=3` and `max_reset_count=2`. If the manager keeps making the same delegation or gets confused, the workflow terminates.
|
| 43 |
+
|
| 44 |
+
3. **Emits final event without synthesis** - The framework sends `MagenticFinalResultEvent` or `WorkflowOutputEvent` without ReportAgent ever running.
|
| 45 |
+
|
| 46 |
+
### Secondary Issue: Duplicate Complete Events
|
| 47 |
+
|
| 48 |
+
Both `MagenticFinalResultEvent` and `WorkflowOutputEvent` are emitted when the workflow ends. The previous code handled both, yielding "Research complete." twice.
|
| 49 |
+
|
| 50 |
+
---
|
| 51 |
+
|
| 52 |
+
## The Fix
|
| 53 |
+
|
| 54 |
+
### 1. Track ReportAgent Execution (Forced Synthesis)
|
| 55 |
+
|
| 56 |
+
Add a `reporter_ran` flag that tracks whether ReportAgent produced output:
|
| 57 |
+
|
| 58 |
+
```python
|
| 59 |
+
reporter_ran = False # P1 FIX: Track if ReportAgent produced output
|
| 60 |
+
|
| 61 |
+
# In MagenticAgentMessageEvent handler:
|
| 62 |
+
agent_name = (event.agent_id or "").lower()
|
| 63 |
+
if "report" in agent_name:
|
| 64 |
+
reporter_ran = True
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### 2. Force Synthesis on Final Event
|
| 68 |
+
|
| 69 |
+
If the workflow ends without ReportAgent running, force synthesis:
|
| 70 |
+
|
| 71 |
+
```python
|
| 72 |
+
if isinstance(event, (MagenticFinalResultEvent, WorkflowOutputEvent)):
|
| 73 |
+
if not reporter_ran:
|
| 74 |
+
logger.warning("ReportAgent never ran - forcing synthesis")
|
| 75 |
+
async for synth_event in self._force_synthesis(iteration):
|
| 76 |
+
yield synth_event
|
| 77 |
+
else:
|
| 78 |
+
yield self._handle_final_event(event, iteration, last_streamed_length)
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### 3. `_force_synthesis()` Method
|
| 82 |
+
|
| 83 |
+
Similar to `_handle_timeout()`, invokes ReportAgent directly:
|
| 84 |
+
|
| 85 |
+
```python
|
| 86 |
+
async def _force_synthesis(self, iteration: int) -> AsyncGenerator[AgentEvent, None]:
|
| 87 |
+
"""Force synthesis when workflow ends without ReportAgent running."""
|
| 88 |
+
state = get_magentic_state()
|
| 89 |
+
evidence_summary = await state.memory.get_context_summary()
|
| 90 |
+
report_agent = create_report_agent(self._chat_client, domain=self.domain)
|
| 91 |
+
|
| 92 |
+
yield AgentEvent(type="synthesizing", message="Synthesizing research findings...")
|
| 93 |
+
|
| 94 |
+
synthesis_result = await report_agent.run(
|
| 95 |
+
f"Synthesize research report from this evidence.\n\n{evidence_summary}"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
yield AgentEvent(type="complete", message=synthesis_result.text)
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### 4. Skip Duplicate Final Events
|
| 102 |
+
|
| 103 |
+
Prevent "Research complete." appearing twice:
|
| 104 |
+
|
| 105 |
+
```python
|
| 106 |
+
if isinstance(event, (MagenticFinalResultEvent, WorkflowOutputEvent)):
|
| 107 |
+
if final_event_received:
|
| 108 |
+
continue # Skip duplicate final events
|
| 109 |
+
final_event_received = True
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
## Why This Is The Correct Architecture
|
| 115 |
+
|
| 116 |
+
| Alternative | Why Wrong |
|
| 117 |
+
|-------------|-----------|
|
| 118 |
+
| Improve manager prompt | 7B models have fundamental reasoning limitations |
|
| 119 |
+
| Use larger model for manager | Defeats "free tier" purpose |
|
| 120 |
+
| Wait for upstream fix | Framework may never change; we control our code |
|
| 121 |
+
| **Forced synthesis safety net** | ✅ Guarantees output regardless of manager behavior |
|
| 122 |
+
|
| 123 |
+
The `_force_synthesis()` pattern is a **defensive architecture**. It guarantees users always get a research report, even if:
|
| 124 |
+
- The manager model fails to delegate properly
|
| 125 |
+
- The workflow hits stall/reset limits
|
| 126 |
+
- Any unexpected termination occurs
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
## Files Modified
|
| 131 |
+
|
| 132 |
+
| File | Change |
|
| 133 |
+
|------|--------|
|
| 134 |
+
| `src/orchestrators/advanced.py` | Added `reporter_ran` tracking |
|
| 135 |
+
| `src/orchestrators/advanced.py` | Added `_force_synthesis()` method |
|
| 136 |
+
| `src/orchestrators/advanced.py` | Added duplicate final event skipping |
|
| 137 |
+
| `src/orchestrators/advanced.py` | Added forced synthesis in final event handler |
|
| 138 |
+
| `src/orchestrators/advanced.py` | Added forced synthesis in max rounds fallback |
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## Test Plan
|
| 143 |
+
|
| 144 |
+
1. **Free Tier**: Run query, verify synthesis report is always generated
|
| 145 |
+
2. **Paid Tier**: Run query, verify no regression in OpenAI behavior
|
| 146 |
+
3. **Timeout**: Verify existing timeout synthesis still works
|
| 147 |
+
4. **Max Rounds**: Verify synthesis happens even at max rounds
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## Related
|
| 152 |
+
|
| 153 |
+
- P2 Duplicate Report Bug (separate issue, also fixed in this PR)
|
| 154 |
+
- P2 First Turn Timeout Bug (previously fixed)
|
| 155 |
+
- Manager model limitations are fundamental to 7B models
|
| 156 |
+
- OpenAI tier works because GPT-5 follows instructions better
|
| 157 |
+
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
## Lessons Learned
|
| 161 |
+
|
| 162 |
+
1. **Defensive architecture** - Don't trust upstream components to always behave correctly
|
| 163 |
+
2. **Tracking flags** - Simple boolean flags can enable powerful safety nets
|
| 164 |
+
3. **AI-native challenges** - When using AI models as infrastructure components, build in fallbacks for model failures
|
| 165 |
+
4. **Regression prevention** - This bug was likely introduced when we unified the architecture; comprehensive test coverage is critical
|
src/orchestrators/advanced.py
CHANGED
|
@@ -35,7 +35,7 @@ from src.agents.magentic_agents import (
|
|
| 35 |
create_report_agent,
|
| 36 |
create_search_agent,
|
| 37 |
)
|
| 38 |
-
from src.agents.state import init_magentic_state
|
| 39 |
from src.clients.base import BaseChatClient
|
| 40 |
from src.clients.factory import get_chat_client
|
| 41 |
from src.config.domain import ResearchDomain, get_domain_config
|
|
@@ -49,6 +49,12 @@ if TYPE_CHECKING:
|
|
| 49 |
|
| 50 |
logger = structlog.get_logger()
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
class AdvancedOrchestrator(OrchestratorProtocol):
|
| 54 |
"""
|
|
@@ -198,32 +204,39 @@ The final output should be a structured research report."""
|
|
| 198 |
iteration=0,
|
| 199 |
)
|
| 200 |
|
| 201 |
-
async def
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
try:
|
| 207 |
-
from src.agents.magentic_agents import create_report_agent
|
| 208 |
-
from src.agents.state import get_magentic_state
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
report_agent = create_report_agent(self._chat_client, domain=self.domain)
|
| 218 |
|
| 219 |
yield AgentEvent(
|
| 220 |
type="synthesizing",
|
| 221 |
-
message=
|
| 222 |
iteration=iteration,
|
| 223 |
)
|
| 224 |
|
| 225 |
-
# Invoke ReportAgent directly
|
| 226 |
-
# Note: ChatAgent.run() returns AgentRunResponse; access text via .text
|
| 227 |
synthesis_result = await report_agent.run(
|
| 228 |
"Synthesize research report from this evidence. "
|
| 229 |
f"If evidence is sparse, say so.\n\n{evidence_summary}"
|
|
@@ -232,22 +245,21 @@ The final output should be a structured research report."""
|
|
| 232 |
yield AgentEvent(
|
| 233 |
type="complete",
|
| 234 |
message=synthesis_result.text,
|
| 235 |
-
data={"reason": "
|
| 236 |
iteration=iteration,
|
| 237 |
)
|
| 238 |
except Exception as synth_error:
|
| 239 |
-
logger.error("
|
| 240 |
yield AgentEvent(
|
| 241 |
type="complete",
|
| 242 |
-
message=
|
| 243 |
-
|
| 244 |
-
f"Evidence gathered but synthesis failed: {synth_error}"
|
| 245 |
-
),
|
| 246 |
-
data={"reason": "timeout_synthesis_failed", "iterations": iteration},
|
| 247 |
iteration=iteration,
|
| 248 |
)
|
| 249 |
|
| 250 |
-
async def run(
|
|
|
|
|
|
|
| 251 |
"""
|
| 252 |
Run the workflow.
|
| 253 |
|
|
@@ -295,6 +307,7 @@ The final output should be a structured research report."""
|
|
| 295 |
|
| 296 |
iteration = 0
|
| 297 |
final_event_received = False
|
|
|
|
| 298 |
|
| 299 |
# ACCUMULATOR PATTERN: Track streaming content to bypass upstream Repr Bug
|
| 300 |
# Upstream bug in _magentic.py flattens message.contents and sets message.text
|
|
@@ -328,6 +341,11 @@ The final output should be a structured research report."""
|
|
| 328 |
if isinstance(event, MagenticAgentMessageEvent):
|
| 329 |
iteration += 1
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
comp_event, prog_event = self._handle_completion_event(
|
| 332 |
event, current_message_buffer, iteration
|
| 333 |
)
|
|
@@ -340,10 +358,24 @@ The final output should be a structured research report."""
|
|
| 340 |
current_message_buffer = ""
|
| 341 |
continue
|
| 342 |
|
| 343 |
-
# 3. Handle Final Events Inline (P2 Duplicate Report Fix)
|
| 344 |
if isinstance(event, (MagenticFinalResultEvent, WorkflowOutputEvent)):
|
|
|
|
|
|
|
| 345 |
final_event_received = True
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
continue
|
| 348 |
|
| 349 |
# 4. Handle other events normally
|
|
@@ -358,19 +390,24 @@ The final output should be a structured research report."""
|
|
| 358 |
"Workflow ended without final event",
|
| 359 |
iterations=iteration,
|
| 360 |
)
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
except TimeoutError:
|
| 373 |
-
async for event in self.
|
| 374 |
yield event
|
| 375 |
|
| 376 |
except Exception as e:
|
|
@@ -517,13 +554,13 @@ The final output should be a structured research report."""
|
|
| 517 |
Event type string matching AgentEvent.type Literal
|
| 518 |
"""
|
| 519 |
agent_lower = agent_name.lower()
|
| 520 |
-
if
|
| 521 |
return "search_complete"
|
| 522 |
-
if
|
| 523 |
return "judge_complete"
|
| 524 |
-
if
|
| 525 |
return "hypothesizing"
|
| 526 |
-
if
|
| 527 |
return "synthesizing"
|
| 528 |
return "judging" # Default for unknown agents
|
| 529 |
|
|
|
|
| 35 |
create_report_agent,
|
| 36 |
create_search_agent,
|
| 37 |
)
|
| 38 |
+
from src.agents.state import get_magentic_state, init_magentic_state
|
| 39 |
from src.clients.base import BaseChatClient
|
| 40 |
from src.clients.factory import get_chat_client
|
| 41 |
from src.config.domain import ResearchDomain, get_domain_config
|
|
|
|
| 49 |
|
| 50 |
logger = structlog.get_logger()
|
| 51 |
|
| 52 |
+
# Agent ID constants - prevents silent breakage if agents are renamed
|
| 53 |
+
REPORTER_AGENT_ID = "reporter"
|
| 54 |
+
SEARCHER_AGENT_ID = "searcher"
|
| 55 |
+
JUDGE_AGENT_ID = "judge"
|
| 56 |
+
HYPOTHESIZER_AGENT_ID = "hypothesizer"
|
| 57 |
+
|
| 58 |
|
| 59 |
class AdvancedOrchestrator(OrchestratorProtocol):
|
| 60 |
"""
|
|
|
|
| 204 |
iteration=0,
|
| 205 |
)
|
| 206 |
|
| 207 |
+
async def _synthesize_fallback(
|
| 208 |
+
self, iteration: int, reason: str
|
| 209 |
+
) -> AsyncGenerator[AgentEvent, None]:
|
| 210 |
+
"""
|
| 211 |
+
Unified fallback synthesis for all termination scenarios.
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
+
This method handles synthesis when the workflow terminates without
|
| 214 |
+
a proper report from ReportAgent. It's a safety net for:
|
| 215 |
+
- Timeout scenarios
|
| 216 |
+
- Manager model failing to delegate to ReportAgent (7B model limitation)
|
| 217 |
+
- Max rounds reached without synthesis
|
| 218 |
|
| 219 |
+
Args:
|
| 220 |
+
iteration: Current workflow iteration count
|
| 221 |
+
reason: Why synthesis is being forced ("timeout", "no_reporter", "max_rounds")
|
| 222 |
+
"""
|
| 223 |
+
status_messages = {
|
| 224 |
+
"timeout": "Workflow timed out. Synthesizing available evidence...",
|
| 225 |
+
"no_reporter": "Synthesizing research findings...",
|
| 226 |
+
"max_rounds": "Max rounds reached. Synthesizing findings...",
|
| 227 |
+
}
|
| 228 |
|
| 229 |
+
try:
|
| 230 |
+
state = get_magentic_state()
|
| 231 |
+
evidence_summary = await state.memory.get_context_summary()
|
| 232 |
report_agent = create_report_agent(self._chat_client, domain=self.domain)
|
| 233 |
|
| 234 |
yield AgentEvent(
|
| 235 |
type="synthesizing",
|
| 236 |
+
message=status_messages.get(reason, "Synthesizing..."),
|
| 237 |
iteration=iteration,
|
| 238 |
)
|
| 239 |
|
|
|
|
|
|
|
| 240 |
synthesis_result = await report_agent.run(
|
| 241 |
"Synthesize research report from this evidence. "
|
| 242 |
f"If evidence is sparse, say so.\n\n{evidence_summary}"
|
|
|
|
| 245 |
yield AgentEvent(
|
| 246 |
type="complete",
|
| 247 |
message=synthesis_result.text,
|
| 248 |
+
data={"reason": f"{reason}_synthesis", "iterations": iteration},
|
| 249 |
iteration=iteration,
|
| 250 |
)
|
| 251 |
except Exception as synth_error:
|
| 252 |
+
logger.error(f"{reason} synthesis failed", error=str(synth_error))
|
| 253 |
yield AgentEvent(
|
| 254 |
type="complete",
|
| 255 |
+
message=f"Research completed. Synthesis failed: {synth_error}",
|
| 256 |
+
data={"reason": f"{reason}_synthesis_failed", "iterations": iteration},
|
|
|
|
|
|
|
|
|
|
| 257 |
iteration=iteration,
|
| 258 |
)
|
| 259 |
|
| 260 |
+
async def run( # noqa: PLR0915 - Complex but necessary for event stream handling
|
| 261 |
+
self, query: str
|
| 262 |
+
) -> AsyncGenerator[AgentEvent, None]:
|
| 263 |
"""
|
| 264 |
Run the workflow.
|
| 265 |
|
|
|
|
| 307 |
|
| 308 |
iteration = 0
|
| 309 |
final_event_received = False
|
| 310 |
+
reporter_ran = False # P1 FIX: Track if ReportAgent produced output
|
| 311 |
|
| 312 |
# ACCUMULATOR PATTERN: Track streaming content to bypass upstream Repr Bug
|
| 313 |
# Upstream bug in _magentic.py flattens message.contents and sets message.text
|
|
|
|
| 341 |
if isinstance(event, MagenticAgentMessageEvent):
|
| 342 |
iteration += 1
|
| 343 |
|
| 344 |
+
# P1 FIX: Track if ReportAgent produced output
|
| 345 |
+
agent_name = (event.agent_id or "").lower()
|
| 346 |
+
if REPORTER_AGENT_ID in agent_name:
|
| 347 |
+
reporter_ran = True
|
| 348 |
+
|
| 349 |
comp_event, prog_event = self._handle_completion_event(
|
| 350 |
event, current_message_buffer, iteration
|
| 351 |
)
|
|
|
|
| 358 |
current_message_buffer = ""
|
| 359 |
continue
|
| 360 |
|
| 361 |
+
# 3. Handle Final Events Inline (P2 Duplicate Report Fix + P1 Forced Synthesis)
|
| 362 |
if isinstance(event, (MagenticFinalResultEvent, WorkflowOutputEvent)):
|
| 363 |
+
if final_event_received:
|
| 364 |
+
continue # Skip duplicate final events
|
| 365 |
final_event_received = True
|
| 366 |
+
|
| 367 |
+
# P1 FIX: Force synthesis if ReportAgent never ran
|
| 368 |
+
if not reporter_ran:
|
| 369 |
+
logger.warning(
|
| 370 |
+
"ReportAgent never ran - forcing synthesis",
|
| 371 |
+
iterations=iteration,
|
| 372 |
+
)
|
| 373 |
+
async for synth_event in self._synthesize_fallback(
|
| 374 |
+
iteration, "no_reporter"
|
| 375 |
+
):
|
| 376 |
+
yield synth_event
|
| 377 |
+
else:
|
| 378 |
+
yield self._handle_final_event(event, iteration, last_streamed_length)
|
| 379 |
continue
|
| 380 |
|
| 381 |
# 4. Handle other events normally
|
|
|
|
| 390 |
"Workflow ended without final event",
|
| 391 |
iterations=iteration,
|
| 392 |
)
|
| 393 |
+
# P1 FIX: Force synthesis if ReportAgent never ran
|
| 394 |
+
if not reporter_ran:
|
| 395 |
+
async for synth_event in self._synthesize_fallback(iteration, "max_rounds"):
|
| 396 |
+
yield synth_event
|
| 397 |
+
else:
|
| 398 |
+
yield AgentEvent(
|
| 399 |
+
type="complete",
|
| 400 |
+
message=(
|
| 401 |
+
f"Research completed after {iteration} agent rounds. "
|
| 402 |
+
"Max iterations reached - results may be partial. "
|
| 403 |
+
"Try a more specific query for better results."
|
| 404 |
+
),
|
| 405 |
+
data={"iterations": iteration, "reason": "max_rounds_reached"},
|
| 406 |
+
iteration=iteration,
|
| 407 |
+
)
|
| 408 |
|
| 409 |
except TimeoutError:
|
| 410 |
+
async for event in self._synthesize_fallback(iteration, "timeout"):
|
| 411 |
yield event
|
| 412 |
|
| 413 |
except Exception as e:
|
|
|
|
| 554 |
Event type string matching AgentEvent.type Literal
|
| 555 |
"""
|
| 556 |
agent_lower = agent_name.lower()
|
| 557 |
+
if SEARCHER_AGENT_ID in agent_lower:
|
| 558 |
return "search_complete"
|
| 559 |
+
if JUDGE_AGENT_ID in agent_lower:
|
| 560 |
return "judge_complete"
|
| 561 |
+
if HYPOTHESIZER_AGENT_ID in agent_lower:
|
| 562 |
return "hypothesizing"
|
| 563 |
+
if REPORTER_AGENT_ID in agent_lower:
|
| 564 |
return "synthesizing"
|
| 565 |
return "judging" # Default for unknown agents
|
| 566 |
|
tests/unit/orchestrators/test_advanced_timeout.py
CHANGED
|
@@ -27,11 +27,13 @@ async def test_timeout_synthesizes_evidence():
|
|
| 27 |
mock_workflow.run_stream = slow_stream
|
| 28 |
|
| 29 |
# Mock dependencies used inside the timeout block
|
|
|
|
|
|
|
| 30 |
with (
|
| 31 |
patch.object(orchestrator, "_build_workflow", return_value=mock_workflow),
|
| 32 |
patch("src.orchestrators.advanced.init_magentic_state"),
|
| 33 |
-
patch("src.
|
| 34 |
-
patch("src.
|
| 35 |
):
|
| 36 |
# Setup mock state and memory
|
| 37 |
mock_memory = AsyncMock()
|
|
|
|
| 27 |
mock_workflow.run_stream = slow_stream
|
| 28 |
|
| 29 |
# Mock dependencies used inside the timeout block
|
| 30 |
+
# Note: get_magentic_state and create_report_agent are imported at module level in advanced.py
|
| 31 |
+
# so we must patch them in that module's namespace, not their original location
|
| 32 |
with (
|
| 33 |
patch.object(orchestrator, "_build_workflow", return_value=mock_workflow),
|
| 34 |
patch("src.orchestrators.advanced.init_magentic_state"),
|
| 35 |
+
patch("src.orchestrators.advanced.get_magentic_state") as mock_get_state,
|
| 36 |
+
patch("src.orchestrators.advanced.create_report_agent") as mock_create_agent,
|
| 37 |
):
|
| 38 |
# Setup mock state and memory
|
| 39 |
mock_memory = AsyncMock()
|