VibecoderMcSwaggins commited on
Commit
45e98bc
·
unverified ·
2 Parent(s): 7e1184a fd7948d

Merge pull request #107 from The-Obstacle-Is-The-Way/fix/p1-chain-of-thought-interpretability

Browse files
docs/bugs/ACTIVE_BUGS.md CHANGED
@@ -23,16 +23,9 @@ _No active P0 bugs._
23
  - `Manager (task_ledger): We are working to address...`
24
  - `Manager (instruction): Conduct targeted searches on PubMed...`
25
 
26
- These are framework-internal bookkeeping truncated at 200 chars, making them uninterpretable.
27
-
28
  **Root Cause:** `_process_event()` in `advanced.py` doesn't filter or transform `MagenticOrchestratorMessageEvent` events from `agent-framework-core`.
29
 
30
- **Solution Options:**
31
- 1. Filter internal events (`user_task`, `task_ledger`, `instruction`)
32
- 2. Transform to user-friendly messages ("Manager assigning search task...")
33
- 3. Add verbose mode for debugging
34
-
35
- **Status:** Open
36
 
37
  ---
38
 
 
23
  - `Manager (task_ledger): We are working to address...`
24
  - `Manager (instruction): Conduct targeted searches on PubMed...`
25
 
 
 
26
  **Root Cause:** `_process_event()` in `advanced.py` doesn't filter or transform `MagenticOrchestratorMessageEvent` events from `agent-framework-core`.
27
 
28
+ **Status:** PR [#107](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/pull/107) open, pending merge.
 
 
 
 
 
29
 
30
  ---
31
 
docs/bugs/P1_ADVANCED_MODE_UNINTERPRETABLE_CHAIN_OF_THOUGHT.md CHANGED
@@ -2,8 +2,9 @@
2
 
3
  **Priority**: P1 (UX Degradation)
4
  **Component**: `src/orchestrators/advanced.py`
5
- **Status**: Open
6
  **Issue**: [#106](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/106)
 
7
  **Created**: 2025-12-01
8
 
9
  ## Summary
@@ -15,6 +16,16 @@ The Advanced orchestrator exposes raw internal framework events from `agent-fram
15
  3. Shown with misleading "JUDGING" event type
16
  4. Not meaningful to end users
17
 
 
 
 
 
 
 
 
 
 
 
18
  ## Example of Bad Output
19
 
20
  ```
 
2
 
3
  **Priority**: P1 (UX Degradation)
4
  **Component**: `src/orchestrators/advanced.py`
5
+ **Status**: Fix Ready (PR #107 open)
6
  **Issue**: [#106](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/106)
7
+ **PR**: [#107](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/pull/107)
8
  **Created**: 2025-12-01
9
 
10
  ## Summary
 
16
  3. Shown with misleading "JUDGING" event type
17
  4. Not meaningful to end users
18
 
19
+ ## Resolution
20
+
21
+ Implemented "Smart Filter + Transform" logic in `src/orchestrators/advanced.py`:
22
+
23
+ 1. **Filtered**: `task_ledger` and `instruction` events are now hidden.
24
+ 2. **Transformed**: `user_task` events are mapped to `type="progress"` with a friendly "Manager assigning research task..." message.
25
+ 3. **Smart Truncation**: Text is now truncated at sentence boundaries or word boundaries, preventing mid-word cuts.
26
+
27
+ Verified with new unit tests in `tests/unit/orchestrators/test_advanced_events.py`.
28
+
29
  ## Example of Bad Output
30
 
31
  ```
src/orchestrators/advanced.py CHANGED
@@ -358,17 +358,46 @@ The final output should be a structured research report."""
358
  return "synthesizing"
359
  return "judging" # Default for unknown agents
360
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
362
  """Process workflow event into AgentEvent."""
363
  if isinstance(event, MagenticOrchestratorMessageEvent):
364
- text = self._extract_text(event.message)
365
- if text:
 
 
 
 
 
366
  return AgentEvent(
367
- type="judging",
368
- message=f"Manager ({event.kind}): {text[:200]}...",
369
  iteration=iteration,
370
  )
371
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  elif isinstance(event, MagenticAgentMessageEvent):
373
  agent_name = event.agent_id or "unknown"
374
  text = self._extract_text(event.message)
@@ -377,7 +406,7 @@ The final output should be a structured research report."""
377
  # All returned types are valid AgentEvent.type literals
378
  return AgentEvent(
379
  type=event_type, # type: ignore[arg-type]
380
- message=f"{agent_name}: {text[:200]}...",
381
  iteration=iteration + 1,
382
  )
383
 
 
358
  return "synthesizing"
359
  return "judging" # Default for unknown agents
360
 
361
+ def _smart_truncate(self, text: str, max_len: int = 200) -> str:
362
+ """Truncate at sentence boundary to avoid cutting words."""
363
+ if len(text) <= max_len:
364
+ return text
365
+ # Find last sentence boundary before limit
366
+ truncated = text[:max_len]
367
+ last_period = truncated.rfind(". ")
368
+ if last_period > max_len // 2:
369
+ return truncated[: last_period + 1]
370
+ # Fallback to word boundary
371
+ return truncated.rsplit(" ", 1)[0] + "..."
372
+
373
  def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
374
  """Process workflow event into AgentEvent."""
375
  if isinstance(event, MagenticOrchestratorMessageEvent):
376
+ # FILTERING: Skip internal framework bookkeeping
377
+ if event.kind in ("task_ledger", "instruction"):
378
+ return None
379
+
380
+ # TRANSFORMATION: Handle user_task BEFORE text extraction
381
+ # (user_task uses static message, doesn't need text content)
382
+ if event.kind == "user_task":
383
  return AgentEvent(
384
+ type="progress",
385
+ message="Manager assigning research task to agents...",
386
  iteration=iteration,
387
  )
388
 
389
+ # For other manager events, extract and validate text
390
+ text = self._extract_text(event.message)
391
+ if not text:
392
+ return None
393
+
394
+ # Default fallback for other manager events
395
+ return AgentEvent(
396
+ type="judging",
397
+ message=f"Manager ({event.kind}): {self._smart_truncate(text)}",
398
+ iteration=iteration,
399
+ )
400
+
401
  elif isinstance(event, MagenticAgentMessageEvent):
402
  agent_name = event.agent_id or "unknown"
403
  text = self._extract_text(event.message)
 
406
  # All returned types are valid AgentEvent.type literals
407
  return AgentEvent(
408
  type=event_type, # type: ignore[arg-type]
409
+ message=f"{agent_name}: {self._smart_truncate(text)}",
410
  iteration=iteration + 1,
411
  )
412
 
tests/unit/orchestrators/test_advanced_events.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test for AdvancedOrchestrator event processing (P1 Bug)."""
2
+
3
+ import pytest
4
+ from agent_framework import MagenticOrchestratorMessageEvent
5
+
6
+ from src.orchestrators.advanced import AdvancedOrchestrator
7
+
8
+
9
+ @pytest.mark.unit
10
+ class TestAdvancedEventProcessing:
11
+ """Test event processing logic in AdvancedOrchestrator."""
12
+
13
+ @pytest.fixture
14
+ def orchestrator(self) -> AdvancedOrchestrator:
15
+ """Create an orchestrator instance with mocks."""
16
+ # Bypass __init__ logic that requires keys/env vars
17
+ orch = AdvancedOrchestrator.__new__(AdvancedOrchestrator)
18
+ # Minimal setup
19
+ orch._max_rounds = 5
20
+ orch._timeout_seconds = 300.0
21
+ return orch
22
+
23
+ def test_filters_internal_task_ledger_events(self, orchestrator: AdvancedOrchestrator) -> None:
24
+ """
25
+ Bug P1: Internal 'task_ledger' events should be filtered out.
26
+
27
+ Current behavior: Returns AgentEvent(type='judging', message='Manager (task_ledger): ...')
28
+ Desired behavior: Returns None (filtered)
29
+ """
30
+ # Create a raw internal framework event
31
+ raw_event = MagenticOrchestratorMessageEvent(
32
+ kind="task_ledger",
33
+ message="We are working to address the following user request: Research sildenafil...",
34
+ )
35
+
36
+ # Process the event
37
+ result = orchestrator._process_event(raw_event, iteration=1)
38
+
39
+ # FAIL if the event is NOT filtered (i.e., if it returns an event)
40
+ assert result is None, f"Should filter 'task_ledger' events, but got: {result}"
41
+
42
+ def test_filters_internal_instruction_events(self, orchestrator: AdvancedOrchestrator) -> None:
43
+ """
44
+ Bug P1: Internal 'instruction' events should be filtered out.
45
+
46
+ Current behavior: Returns AgentEvent(type='judging', message='Manager (instruction): ...')
47
+ Desired behavior: Returns None (filtered)
48
+ """
49
+ raw_event = MagenticOrchestratorMessageEvent(
50
+ kind="instruction", message="Conduct targeted searches on PubMed..."
51
+ )
52
+
53
+ result = orchestrator._process_event(raw_event, iteration=1)
54
+
55
+ assert result is None, f"Should filter 'instruction' events, but got: {result}"
56
+
57
+ def test_transforms_user_task_events(self, orchestrator: AdvancedOrchestrator) -> None:
58
+ """
59
+ Bug P1: 'user_task' events should be transformed to user-friendly messages.
60
+
61
+ Current behavior: 'Manager (user_task): Research...' (truncated, type='judging')
62
+ Desired behavior: 'Manager assigning research task...' (type='progress')
63
+ """
64
+ raw_event = MagenticOrchestratorMessageEvent(
65
+ kind="user_task",
66
+ message="Research sexual health and wellness interventions for: sildenafil mechanism",
67
+ )
68
+
69
+ result = orchestrator._process_event(raw_event, iteration=1)
70
+
71
+ assert result is not None
72
+ assert result.type == "progress" # NOT "judging"
73
+ assert "Manager assigning research task" in result.message
74
+ # Should use the generic friendly message
75
+ assert "sildenafil mechanism" not in result.message
76
+
77
+ def test_prevents_mid_sentence_truncation(self, orchestrator: AdvancedOrchestrator) -> None:
78
+ """
79
+ Bug P1: Long messages should be smart-truncated at sentence boundaries.
80
+
81
+ Tests _smart_truncate directly to ensure regression protection.
82
+ The function truncates at sentence boundary if period is after halfway point.
83
+ """
84
+ # First sentence ends at position ~55, which is > 50 (100//2)
85
+ long_text = (
86
+ "This is a longer first sentence that ends past the midpoint. "
87
+ "Second sentence continues with more text that would be cut."
88
+ )
89
+
90
+ # Call the helper directly to test its behavior explicitly
91
+ truncated = orchestrator._smart_truncate(long_text, max_len=100)
92
+
93
+ # Should truncate at the end of the first sentence (period > max_len//2)
94
+ assert truncated.endswith("midpoint.")
95
+ assert "Second sentence" not in truncated
96
+ assert len(truncated) <= 100
97
+
98
+ def test_smart_truncate_word_boundary_fallback(
99
+ self, orchestrator: AdvancedOrchestrator
100
+ ) -> None:
101
+ """Test that truncation falls back to word boundary when no sentence end."""
102
+ # No sentence ending in the first 80 chars
103
+ long_text = "This is a very long text without any sentence ending in the limit"
104
+
105
+ truncated = orchestrator._smart_truncate(long_text, max_len=50)
106
+
107
+ # Should end with "..." and not cut mid-word
108
+ assert truncated.endswith("...")
109
+ assert len(truncated) <= 53 # 50 + "..."