Spaces:

jim-bo
/

cli-textual-demo

Sleeping

jim-bo Claude Opus 4.6 (1M context) commited on Mar 23

Commit

2509530

1 Parent(s): c0cf2a6

feat: thinking transparency layer with /verbose command

Switch pipeline from stream_text() to stream_responses() to capture
thinking/reasoning tokens from models like Claude. Render thinking in
collapsible TUI panels, collapsed by default, expandable via /verbose.

New events: AgentThinkingChunk, AgentThinkingComplete
New command: /verbose (toggles thinking visibility)
New tests: 5 tests covering pipeline, TUI rendering, and toggle

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (7) hide show

src/cli_textual/agents/manager.py +38 -7
src/cli_textual/app.py +28 -5
src/cli_textual/app.tcss +17 -0
src/cli_textual/core/chat_events.py +10 -0
src/cli_textual/plugins/commands/verbose.py +19 -0
tests/conftest.py +2 -2
tests/unit/test_thinking.py +113 -0

src/cli_textual/agents/manager.py CHANGED Viewed

@@ -2,9 +2,12 @@ import asyncio
 from typing import AsyncGenerator, List, Any
 from pydantic_ai import Agent, RunContext
 from cli_textual.core.chat_events import (
     ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentToolOutput,
-    AgentStreamChunk, AgentComplete, AgentRequiresUserInput, ChatDeps, AgentExecuteCommand
 )
 from cli_textual.agents.model import model
 from cli_textual.tools.bash import bash_exec as pure_bash_exec
@@ -129,12 +132,40 @@ async def run_manager_pipeline(
     async def run_agent():
         try:
             async with manager_agent.run_stream(prompt, deps=deps, message_history=message_history) as result:
-                last_length = 0
-                async for text in result.stream_text():
-                    new_part = text[last_length:]
-                    if new_part:
-                        await event_queue.put(AgentStreamChunk(text=new_part))
-                        last_length = len(text)
                 await event_queue.put(AgentComplete(new_history=result.new_messages()))
         except Exception as e:

 from typing import AsyncGenerator, List, Any
 from pydantic_ai import Agent, RunContext
+from pydantic_ai.messages import ThinkingPart, TextPart
 from cli_textual.core.chat_events import (
     ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentToolOutput,
+    AgentStreamChunk, AgentComplete, AgentRequiresUserInput, ChatDeps, AgentExecuteCommand,
+    AgentThinkingChunk, AgentThinkingComplete,
 )
 from cli_textual.agents.model import model
 from cli_textual.tools.bash import bash_exec as pure_bash_exec
     async def run_agent():
         try:
             async with manager_agent.run_stream(prompt, deps=deps, message_history=message_history) as result:
+                last_thinking_len = 0
+                last_text_len = 0
+                thinking_complete = False
+                async for response, is_last in result.stream_responses():
+                    # Accumulate thinking and text from all parts
+                    thinking_text = ""
+                    text_text = ""
+                    for part in response.parts:
+                        if isinstance(part, ThinkingPart):
+                            thinking_text += part.content
+                        elif isinstance(part, TextPart):
+                            text_text += part.content
+                    # Emit thinking deltas
+                    if len(thinking_text) > last_thinking_len:
+                        new_thinking = thinking_text[last_thinking_len:]
+                        await event_queue.put(AgentThinkingChunk(text=new_thinking))
+                        last_thinking_len = len(thinking_text)
+                    # Signal thinking done when text starts
+                    if text_text and not thinking_complete and last_thinking_len > 0:
+                        await event_queue.put(AgentThinkingComplete(full_text=thinking_text))
+                        thinking_complete = True
+                    # Emit text deltas
+                    if len(text_text) > last_text_len:
+                        new_text = text_text[last_text_len:]
+                        await event_queue.put(AgentStreamChunk(text=new_text))
+                        last_text_len = len(text_text)
+                # If thinking was emitted but no text followed, still signal complete
+                if last_thinking_len > 0 and not thinking_complete:
+                    await event_queue.put(AgentThinkingComplete(full_text=thinking_text))
                 await event_queue.put(AgentComplete(new_history=result.new_messages()))
         except Exception as e:

src/cli_textual/app.py CHANGED Viewed

@@ -8,8 +8,8 @@ from textual import on, events
 from textual.app import App, ComposeResult
 from textual.containers import Container, VerticalScroll, Horizontal
 from textual.widgets import (
-    Header, Footer, Static, Markdown, Label, OptionList,
-    TabbedContent, DirectoryTree, DataTable
 )
 from textual.widgets.option_list import Option
 from textual.binding import Binding
@@ -20,7 +20,8 @@ from cli_textual.core.permissions import PermissionManager
 from cli_textual.core.command import CommandManager
 from cli_textual.core.chat_events import (
     ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentToolOutput,
-    AgentStreamChunk, AgentComplete, AgentRequiresUserInput, AgentExecuteCommand
 )
 # Pydantic AI Orchestrators
@@ -53,12 +54,13 @@ class ChatApp(App):
         self.chat_mode = os.getenv("CHAT_MODE", "manager")
         self.message_history = [] # For LLM context memory
         self.interactive_input_queue = asyncio.Queue()
         # Initialize Core Managers
         self.workspace_root = Path.cwd().resolve()
         self.fs_manager = FSManager(self.workspace_root)
-        self.permission_manager = PermissionManager(self.workspace_root / ".cbio" / "settings.json")
         self.command_manager = CommandManager()
         # Register Commands via Auto-Discovery
@@ -176,9 +178,30 @@ class ChatApp(App):
         markdown_widget = None
         full_text = ""
         async for event in generator:
-            if isinstance(event, AgentThinking):
                 task_label.update(event.message)
             elif isinstance(event, AgentRequiresUserInput):

 from textual.app import App, ComposeResult
 from textual.containers import Container, VerticalScroll, Horizontal
 from textual.widgets import (
+    Header, Footer, Static, Markdown, Label, OptionList,
+    TabbedContent, DirectoryTree, DataTable, Collapsible
 )
 from textual.widgets.option_list import Option
 from textual.binding import Binding
 from cli_textual.core.command import CommandManager
 from cli_textual.core.chat_events import (
     ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentToolOutput,
+    AgentStreamChunk, AgentComplete, AgentRequiresUserInput, AgentExecuteCommand,
+    AgentThinkingChunk, AgentThinkingComplete,
 )
 # Pydantic AI Orchestrators
         self.chat_mode = os.getenv("CHAT_MODE", "manager")
         self.message_history = [] # For LLM context memory
         self.interactive_input_queue = asyncio.Queue()
+        self.verbose_mode = False
         # Initialize Core Managers
         self.workspace_root = Path.cwd().resolve()
         self.fs_manager = FSManager(self.workspace_root)
+        self.permission_manager = PermissionManager(self.workspace_root / ".agents" / "settings.json")
         self.command_manager = CommandManager()
         # Register Commands via Auto-Discovery
         markdown_widget = None
         full_text = ""
+        thinking_collapsible = None
+        thinking_widget = None
+        thinking_text = ""
         async for event in generator:
+            if isinstance(event, AgentThinkingChunk):
+                if not thinking_collapsible:
+                    thinking_collapsible = Collapsible(
+                        Static("", classes="thinking-content"),
+                        title="Reasoning",
+                        collapsed=not self.verbose_mode,
+                        classes="thinking-block",
+                    )
+                    await history.mount(thinking_collapsible)
+                    thinking_widget = thinking_collapsible.query_one(".thinking-content")
+                thinking_text += event.text
+                thinking_widget.update(thinking_text)
+                history.scroll_end(animate=False)
+            elif isinstance(event, AgentThinkingComplete):
+                if thinking_widget:
+                    thinking_widget.update(event.full_text)
+            elif isinstance(event, AgentThinking):
                 task_label.update(event.message)
             elif isinstance(event, AgentRequiresUserInput):

src/cli_textual/app.tcss CHANGED Viewed

@@ -240,3 +240,20 @@ DirectoryTree {
     padding: 0 1;
     margin: 0 0 1 0;
 }

     padding: 0 1;
     margin: 0 0 1 0;
 }
+.thinking-block {
+    margin: 0 0 1 0;
+    border-left: solid #555555;
+    padding: 0;
+}
+.thinking-block CollapsibleTitle {
+    color: #888888;
+    text-style: italic;
+}
+.thinking-content {
+    color: #777777;
+    padding: 0 1;
+    background: #1A1A1A;
+}

src/cli_textual/core/chat_events.py CHANGED Viewed

@@ -50,6 +50,16 @@ class AgentToolOutput(ChatEvent):
     content: str
     is_error: bool = False
 @dataclass
 class AgentStreamChunk(ChatEvent):
     """A partial chunk of the final text response."""

     content: str
     is_error: bool = False
+@dataclass
+class AgentThinkingChunk(ChatEvent):
+    """A partial chunk of the model's reasoning/thinking tokens."""
+    text: str
+@dataclass
+class AgentThinkingComplete(ChatEvent):
+    """The model has finished emitting thinking tokens for this turn."""
+    full_text: str
 @dataclass
 class AgentStreamChunk(ChatEvent):
     """A partial chunk of the final text response."""

src/cli_textual/plugins/commands/verbose.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from typing import List
+from cli_textual.core.command import SlashCommand
+class VerboseCommand(SlashCommand):
+    """Toggle verbose mode to show agent thinking by default."""
+    @property
+    def name(self) -> str:
+        return "/verbose"
+    @property
+    def description(self) -> str:
+        return "Toggle verbose mode (show thinking expanded)"
+    async def execute(self, app, args: List[str]):
+        app.verbose_mode = not app.verbose_mode
+        state = "ON" if app.verbose_mode else "OFF"
+        app.add_to_history(f"Verbose mode: **{state}**")

tests/conftest.py CHANGED Viewed

@@ -11,13 +11,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')
 def setup_permissions():
     """Automatically approve all tools before every test."""
     workspace_root = Path.cwd().resolve()
-    settings_dir = workspace_root / ".cbio"
     settings_path = settings_dir / "settings.json"
     os.makedirs(settings_dir, exist_ok=True)
     with open(settings_path, "w") as f:
         json.dump({
-            "approved_tools": ["/ls", "/head", "/select", "/load", "/survey", "/clear"]
         }, f)
     yield
     # Cleanup if needed

 def setup_permissions():
     """Automatically approve all tools before every test."""
     workspace_root = Path.cwd().resolve()
+    settings_dir = workspace_root / ".agents"
     settings_path = settings_dir / "settings.json"
     os.makedirs(settings_dir, exist_ok=True)
     with open(settings_path, "w") as f:
         json.dump({
+            "approved_tools": ["/ls", "/head", "/select", "/load", "/survey", "/clear", "/verbose"]
         }, f)
     yield
     # Cleanup if needed

tests/unit/test_thinking.py ADDED Viewed

	@@ -0,0 +1,113 @@

+"""Tests for thinking/reasoning transparency layer."""
+import asyncio
+import pytest
+from pydantic_ai.models.function import FunctionModel, AgentInfo, DeltaThinkingPart
+from pydantic_ai.messages import ModelMessage
+from textual.widgets import Collapsible
+from cli_textual.agents.manager import run_manager_pipeline, manager_agent
+from cli_textual.core.chat_events import (
+    AgentThinkingChunk, AgentThinkingComplete, AgentStreamChunk, AgentComplete,
+)
+from cli_textual.app import ChatApp
+# ---------------------------------------------------------------------------
+# Pipeline tests
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_pipeline_emits_thinking_chunks():
+    """Thinking tokens surface as AgentThinkingChunk events."""
+    async def thinking_then_text(messages: list[ModelMessage], info: AgentInfo):
+        yield {0: DeltaThinkingPart(content="Let me reason about this.")}
+        yield "Here is my answer."
+    input_queue = asyncio.Queue()
+    events = []
+    with manager_agent.override(model=FunctionModel(stream_function=thinking_then_text)):
+        async with asyncio.timeout(5):
+            async for event in run_manager_pipeline("test", input_queue):
+                events.append(event)
+    thinking_chunks = [e for e in events if isinstance(e, AgentThinkingChunk)]
+    assert thinking_chunks, "No AgentThinkingChunk events emitted"
+    thinking_complete = [e for e in events if isinstance(e, AgentThinkingComplete)]
+    assert thinking_complete, "No AgentThinkingComplete event emitted"
+    assert "reason" in thinking_complete[0].full_text.lower()
+    text_chunks = [e for e in events if isinstance(e, AgentStreamChunk)]
+    assert text_chunks, "No text chunks emitted"
+    assert isinstance(events[-1], AgentComplete)
+@pytest.mark.asyncio
+async def test_pipeline_no_thinking_still_works():
+    """Existing behavior preserved when model produces no thinking."""
+    async def text_only(messages: list[ModelMessage], info: AgentInfo):
+        yield "Just text, no thinking."
+    input_queue = asyncio.Queue()
+    events = []
+    with manager_agent.override(model=FunctionModel(stream_function=text_only)):
+        async with asyncio.timeout(5):
+            async for event in run_manager_pipeline("test", input_queue):
+                events.append(event)
+    thinking_chunks = [e for e in events if isinstance(e, AgentThinkingChunk)]
+    assert not thinking_chunks, "Unexpected thinking chunks for text-only model"
+    assert any(isinstance(e, AgentStreamChunk) for e in events)
+    assert isinstance(events[-1], AgentComplete)
+# ---------------------------------------------------------------------------
+# TUI tests
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_thinking_renders_collapsed_by_default():
+    """Thinking appears in a collapsed Collapsible widget."""
+    async def thinking_then_text(messages: list[ModelMessage], info: AgentInfo):
+        yield {0: DeltaThinkingPart(content="Deep thought here")}
+        yield "Final answer."
+    app = ChatApp()
+    with manager_agent.override(model=FunctionModel(stream_function=thinking_then_text)):
+        async with app.run_test(size=(120, 40)) as pilot:
+            await pilot.press(*"hello", "enter")
+            await pilot.pause(2.0)
+            collapsibles = list(app.query_one("#history-container").query(Collapsible))
+            assert collapsibles, "No Collapsible widget found for thinking"
+            assert collapsibles[0].collapsed is True
+@pytest.mark.asyncio
+async def test_verbose_mode_expands_thinking():
+    """With verbose_mode=True, thinking is expanded."""
+    async def thinking_then_text(messages: list[ModelMessage], info: AgentInfo):
+        yield {0: DeltaThinkingPart(content="Deep thought here")}
+        yield "Final answer."
+    app = ChatApp()
+    app.verbose_mode = True
+    with manager_agent.override(model=FunctionModel(stream_function=thinking_then_text)):
+        async with app.run_test(size=(120, 40)) as pilot:
+            await pilot.press(*"hello", "enter")
+            await pilot.pause(2.0)
+            collapsibles = list(app.query_one("#history-container").query(Collapsible))
+            assert collapsibles, "No Collapsible widget found"
+            assert collapsibles[0].collapsed is False
+@pytest.mark.asyncio
+async def test_verbose_command_toggles():
+    """/verbose toggles app.verbose_mode."""
+    app = ChatApp()
+    async with app.run_test(size=(120, 40)) as pilot:
+        assert app.verbose_mode is False
+        await pilot.press(*"/verbose", "enter")
+        await pilot.pause(0.5)
+        assert app.verbose_mode is True