Merge pull request #107 from The-Obstacle-Is-The-Way/fix/p1-chain-of-thought-interpretability
Browse files
docs/bugs/ACTIVE_BUGS.md
CHANGED
|
@@ -23,16 +23,9 @@ _No active P0 bugs._
|
|
| 23 |
- `Manager (task_ledger): We are working to address...`
|
| 24 |
- `Manager (instruction): Conduct targeted searches on PubMed...`
|
| 25 |
|
| 26 |
-
These are framework-internal bookkeeping truncated at 200 chars, making them uninterpretable.
|
| 27 |
-
|
| 28 |
**Root Cause:** `_process_event()` in `advanced.py` doesn't filter or transform `MagenticOrchestratorMessageEvent` events from `agent-framework-core`.
|
| 29 |
|
| 30 |
-
**
|
| 31 |
-
1. Filter internal events (`user_task`, `task_ledger`, `instruction`)
|
| 32 |
-
2. Transform to user-friendly messages ("Manager assigning search task...")
|
| 33 |
-
3. Add verbose mode for debugging
|
| 34 |
-
|
| 35 |
-
**Status:** Open
|
| 36 |
|
| 37 |
---
|
| 38 |
|
|
|
|
| 23 |
- `Manager (task_ledger): We are working to address...`
|
| 24 |
- `Manager (instruction): Conduct targeted searches on PubMed...`
|
| 25 |
|
|
|
|
|
|
|
| 26 |
**Root Cause:** `_process_event()` in `advanced.py` doesn't filter or transform `MagenticOrchestratorMessageEvent` events from `agent-framework-core`.
|
| 27 |
|
| 28 |
+
**Status:** PR [#107](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/pull/107) open, pending merge.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
---
|
| 31 |
|
docs/bugs/P1_ADVANCED_MODE_UNINTERPRETABLE_CHAIN_OF_THOUGHT.md
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
|
| 3 |
**Priority**: P1 (UX Degradation)
|
| 4 |
**Component**: `src/orchestrators/advanced.py`
|
| 5 |
-
**Status**:
|
| 6 |
**Issue**: [#106](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/106)
|
|
|
|
| 7 |
**Created**: 2025-12-01
|
| 8 |
|
| 9 |
## Summary
|
|
@@ -15,6 +16,16 @@ The Advanced orchestrator exposes raw internal framework events from `agent-fram
|
|
| 15 |
3. Shown with misleading "JUDGING" event type
|
| 16 |
4. Not meaningful to end users
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
## Example of Bad Output
|
| 19 |
|
| 20 |
```
|
|
|
|
| 2 |
|
| 3 |
**Priority**: P1 (UX Degradation)
|
| 4 |
**Component**: `src/orchestrators/advanced.py`
|
| 5 |
+
**Status**: Fix Ready (PR #107 open)
|
| 6 |
**Issue**: [#106](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/106)
|
| 7 |
+
**PR**: [#107](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/pull/107)
|
| 8 |
**Created**: 2025-12-01
|
| 9 |
|
| 10 |
## Summary
|
|
|
|
| 16 |
3. Shown with misleading "JUDGING" event type
|
| 17 |
4. Not meaningful to end users
|
| 18 |
|
| 19 |
+
## Resolution
|
| 20 |
+
|
| 21 |
+
Implemented "Smart Filter + Transform" logic in `src/orchestrators/advanced.py`:
|
| 22 |
+
|
| 23 |
+
1. **Filtered**: `task_ledger` and `instruction` events are now hidden.
|
| 24 |
+
2. **Transformed**: `user_task` events are mapped to `type="progress"` with a friendly "Manager assigning research task..." message.
|
| 25 |
+
3. **Smart Truncation**: Text is now truncated at sentence boundaries or word boundaries, preventing mid-word cuts.
|
| 26 |
+
|
| 27 |
+
Verified with new unit tests in `tests/unit/orchestrators/test_advanced_events.py`.
|
| 28 |
+
|
| 29 |
## Example of Bad Output
|
| 30 |
|
| 31 |
```
|
src/orchestrators/advanced.py
CHANGED
|
@@ -358,17 +358,46 @@ The final output should be a structured research report."""
|
|
| 358 |
return "synthesizing"
|
| 359 |
return "judging" # Default for unknown agents
|
| 360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
|
| 362 |
"""Process workflow event into AgentEvent."""
|
| 363 |
if isinstance(event, MagenticOrchestratorMessageEvent):
|
| 364 |
-
|
| 365 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
return AgentEvent(
|
| 367 |
-
type="
|
| 368 |
-
message=
|
| 369 |
iteration=iteration,
|
| 370 |
)
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
elif isinstance(event, MagenticAgentMessageEvent):
|
| 373 |
agent_name = event.agent_id or "unknown"
|
| 374 |
text = self._extract_text(event.message)
|
|
@@ -377,7 +406,7 @@ The final output should be a structured research report."""
|
|
| 377 |
# All returned types are valid AgentEvent.type literals
|
| 378 |
return AgentEvent(
|
| 379 |
type=event_type, # type: ignore[arg-type]
|
| 380 |
-
message=f"{agent_name}: {text
|
| 381 |
iteration=iteration + 1,
|
| 382 |
)
|
| 383 |
|
|
|
|
| 358 |
return "synthesizing"
|
| 359 |
return "judging" # Default for unknown agents
|
| 360 |
|
| 361 |
+
def _smart_truncate(self, text: str, max_len: int = 200) -> str:
|
| 362 |
+
"""Truncate at sentence boundary to avoid cutting words."""
|
| 363 |
+
if len(text) <= max_len:
|
| 364 |
+
return text
|
| 365 |
+
# Find last sentence boundary before limit
|
| 366 |
+
truncated = text[:max_len]
|
| 367 |
+
last_period = truncated.rfind(". ")
|
| 368 |
+
if last_period > max_len // 2:
|
| 369 |
+
return truncated[: last_period + 1]
|
| 370 |
+
# Fallback to word boundary
|
| 371 |
+
return truncated.rsplit(" ", 1)[0] + "..."
|
| 372 |
+
|
| 373 |
def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
|
| 374 |
"""Process workflow event into AgentEvent."""
|
| 375 |
if isinstance(event, MagenticOrchestratorMessageEvent):
|
| 376 |
+
# FILTERING: Skip internal framework bookkeeping
|
| 377 |
+
if event.kind in ("task_ledger", "instruction"):
|
| 378 |
+
return None
|
| 379 |
+
|
| 380 |
+
# TRANSFORMATION: Handle user_task BEFORE text extraction
|
| 381 |
+
# (user_task uses static message, doesn't need text content)
|
| 382 |
+
if event.kind == "user_task":
|
| 383 |
return AgentEvent(
|
| 384 |
+
type="progress",
|
| 385 |
+
message="Manager assigning research task to agents...",
|
| 386 |
iteration=iteration,
|
| 387 |
)
|
| 388 |
|
| 389 |
+
# For other manager events, extract and validate text
|
| 390 |
+
text = self._extract_text(event.message)
|
| 391 |
+
if not text:
|
| 392 |
+
return None
|
| 393 |
+
|
| 394 |
+
# Default fallback for other manager events
|
| 395 |
+
return AgentEvent(
|
| 396 |
+
type="judging",
|
| 397 |
+
message=f"Manager ({event.kind}): {self._smart_truncate(text)}",
|
| 398 |
+
iteration=iteration,
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
elif isinstance(event, MagenticAgentMessageEvent):
|
| 402 |
agent_name = event.agent_id or "unknown"
|
| 403 |
text = self._extract_text(event.message)
|
|
|
|
| 406 |
# All returned types are valid AgentEvent.type literals
|
| 407 |
return AgentEvent(
|
| 408 |
type=event_type, # type: ignore[arg-type]
|
| 409 |
+
message=f"{agent_name}: {self._smart_truncate(text)}",
|
| 410 |
iteration=iteration + 1,
|
| 411 |
)
|
| 412 |
|
tests/unit/orchestrators/test_advanced_events.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test for AdvancedOrchestrator event processing (P1 Bug)."""
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
from agent_framework import MagenticOrchestratorMessageEvent
|
| 5 |
+
|
| 6 |
+
from src.orchestrators.advanced import AdvancedOrchestrator
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@pytest.mark.unit
|
| 10 |
+
class TestAdvancedEventProcessing:
|
| 11 |
+
"""Test event processing logic in AdvancedOrchestrator."""
|
| 12 |
+
|
| 13 |
+
@pytest.fixture
|
| 14 |
+
def orchestrator(self) -> AdvancedOrchestrator:
|
| 15 |
+
"""Create an orchestrator instance with mocks."""
|
| 16 |
+
# Bypass __init__ logic that requires keys/env vars
|
| 17 |
+
orch = AdvancedOrchestrator.__new__(AdvancedOrchestrator)
|
| 18 |
+
# Minimal setup
|
| 19 |
+
orch._max_rounds = 5
|
| 20 |
+
orch._timeout_seconds = 300.0
|
| 21 |
+
return orch
|
| 22 |
+
|
| 23 |
+
def test_filters_internal_task_ledger_events(self, orchestrator: AdvancedOrchestrator) -> None:
|
| 24 |
+
"""
|
| 25 |
+
Bug P1: Internal 'task_ledger' events should be filtered out.
|
| 26 |
+
|
| 27 |
+
Current behavior: Returns AgentEvent(type='judging', message='Manager (task_ledger): ...')
|
| 28 |
+
Desired behavior: Returns None (filtered)
|
| 29 |
+
"""
|
| 30 |
+
# Create a raw internal framework event
|
| 31 |
+
raw_event = MagenticOrchestratorMessageEvent(
|
| 32 |
+
kind="task_ledger",
|
| 33 |
+
message="We are working to address the following user request: Research sildenafil...",
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Process the event
|
| 37 |
+
result = orchestrator._process_event(raw_event, iteration=1)
|
| 38 |
+
|
| 39 |
+
# FAIL if the event is NOT filtered (i.e., if it returns an event)
|
| 40 |
+
assert result is None, f"Should filter 'task_ledger' events, but got: {result}"
|
| 41 |
+
|
| 42 |
+
def test_filters_internal_instruction_events(self, orchestrator: AdvancedOrchestrator) -> None:
|
| 43 |
+
"""
|
| 44 |
+
Bug P1: Internal 'instruction' events should be filtered out.
|
| 45 |
+
|
| 46 |
+
Current behavior: Returns AgentEvent(type='judging', message='Manager (instruction): ...')
|
| 47 |
+
Desired behavior: Returns None (filtered)
|
| 48 |
+
"""
|
| 49 |
+
raw_event = MagenticOrchestratorMessageEvent(
|
| 50 |
+
kind="instruction", message="Conduct targeted searches on PubMed..."
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
result = orchestrator._process_event(raw_event, iteration=1)
|
| 54 |
+
|
| 55 |
+
assert result is None, f"Should filter 'instruction' events, but got: {result}"
|
| 56 |
+
|
| 57 |
+
def test_transforms_user_task_events(self, orchestrator: AdvancedOrchestrator) -> None:
|
| 58 |
+
"""
|
| 59 |
+
Bug P1: 'user_task' events should be transformed to user-friendly messages.
|
| 60 |
+
|
| 61 |
+
Current behavior: 'Manager (user_task): Research...' (truncated, type='judging')
|
| 62 |
+
Desired behavior: 'Manager assigning research task...' (type='progress')
|
| 63 |
+
"""
|
| 64 |
+
raw_event = MagenticOrchestratorMessageEvent(
|
| 65 |
+
kind="user_task",
|
| 66 |
+
message="Research sexual health and wellness interventions for: sildenafil mechanism",
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
result = orchestrator._process_event(raw_event, iteration=1)
|
| 70 |
+
|
| 71 |
+
assert result is not None
|
| 72 |
+
assert result.type == "progress" # NOT "judging"
|
| 73 |
+
assert "Manager assigning research task" in result.message
|
| 74 |
+
# Should use the generic friendly message
|
| 75 |
+
assert "sildenafil mechanism" not in result.message
|
| 76 |
+
|
| 77 |
+
def test_prevents_mid_sentence_truncation(self, orchestrator: AdvancedOrchestrator) -> None:
|
| 78 |
+
"""
|
| 79 |
+
Bug P1: Long messages should be smart-truncated at sentence boundaries.
|
| 80 |
+
|
| 81 |
+
Tests _smart_truncate directly to ensure regression protection.
|
| 82 |
+
The function truncates at sentence boundary if period is after halfway point.
|
| 83 |
+
"""
|
| 84 |
+
# First sentence ends at position ~55, which is > 50 (100//2)
|
| 85 |
+
long_text = (
|
| 86 |
+
"This is a longer first sentence that ends past the midpoint. "
|
| 87 |
+
"Second sentence continues with more text that would be cut."
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Call the helper directly to test its behavior explicitly
|
| 91 |
+
truncated = orchestrator._smart_truncate(long_text, max_len=100)
|
| 92 |
+
|
| 93 |
+
# Should truncate at the end of the first sentence (period > max_len//2)
|
| 94 |
+
assert truncated.endswith("midpoint.")
|
| 95 |
+
assert "Second sentence" not in truncated
|
| 96 |
+
assert len(truncated) <= 100
|
| 97 |
+
|
| 98 |
+
def test_smart_truncate_word_boundary_fallback(
|
| 99 |
+
self, orchestrator: AdvancedOrchestrator
|
| 100 |
+
) -> None:
|
| 101 |
+
"""Test that truncation falls back to word boundary when no sentence end."""
|
| 102 |
+
# No sentence ending in the first 80 chars
|
| 103 |
+
long_text = "This is a very long text without any sentence ending in the limit"
|
| 104 |
+
|
| 105 |
+
truncated = orchestrator._smart_truncate(long_text, max_len=50)
|
| 106 |
+
|
| 107 |
+
# Should end with "..." and not cut mid-word
|
| 108 |
+
assert truncated.endswith("...")
|
| 109 |
+
assert len(truncated) <= 53 # 50 + "..."
|