Spaces:

jim-bo
/

cli-textual-demo

Sleeping

jim-bo Claude Opus 4.6 (1M context) commited on 28 days ago

Commit

597fb95

1 Parent(s): 4c528ec

feat: add native agent tools, /tools command, and fix rendering bugs

- Add bash_exec, read_file, web_fetch tools to manager_agent with
streaming output via AgentToolOutput events
- Add /tools slash command with self-contained ToolsWidget that
introspects agent tools and shows detail on selection
- Fix blank LLM responses: await Markdown.update(), mount(), remove()
- Fix silent error swallowing in run_manager_pipeline
- Fix OpenRouter model routing for nvidia/model:free style IDs
- Switch default model to nvidia/nemotron-3-super-120b-a12b:free
- Use auto_discover for command registration
- Add AgentToolOutput, AgentExecuteCommand event types
- Add unit tests for tools, /tools command, and TUI rendering
- Add integration tests for tool e2e (requires OPENROUTER_API_KEY)
- Fix test_manager_pipeline_flow timeout (use FunctionModel)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (16) hide show

src/cli_textual/agents/orchestrators.py +155 -6
src/cli_textual/agents/prompts.yaml +33 -10
src/cli_textual/agents/specialists.py +19 -16
src/cli_textual/app.py +35 -34
src/cli_textual/app.tcss +16 -0
src/cli_textual/core/chat_events.py +13 -0
src/cli_textual/core/command.py +38 -12
src/cli_textual/plugins/commands/tools.py +66 -0
tests/integration/test_interactive_agents.py +9 -8
tests/integration/test_tool_agents.py +131 -0
tests/integration/test_ui_full_agent_flow.py +60 -0
tests/unit/test_agent_tools.py +222 -0
tests/unit/test_chat_ux.py +50 -0
tests/unit/test_manager_interaction.py +141 -0
tests/unit/test_pydantic_agents.py +9 -11
tests/unit/test_tools_command.py +104 -0

src/cli_textual/agents/orchestrators.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import os
 import asyncio
 from typing import AsyncGenerator, List, Any
 from pydantic_ai import Agent, RunContext
 from pydantic_ai.models.openai import OpenAIChatModel
 from pydantic_ai.models.test import TestModel
 from pydantic_ai.messages import ModelMessage
 from cli_textual.core.chat_events import (
-    ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentStreamChunk, AgentComplete,
-    AgentRequiresUserInput, ChatDeps
 )
 from cli_textual.agents.specialists import model, intent_resolver, data_validator, result_generator
 from cli_textual.core.agent_schemas import IntentResolution, ValidationResult, StructuredResult
@@ -70,12 +72,159 @@ manager_agent = Agent(
 )
 @manager_agent.tool
-async def ask_user_to_select_manager(ctx: RunContext[ChatDeps], prompt: str, options: List[str]) -> str:
-    """Ask the user to select from a list of options. Use this when you need the user to make a choice."""
     await ctx.deps.event_queue.put(AgentRequiresUserInput(tool_name="/select", prompt=prompt, options=options))
     response = await ctx.deps.input_queue.get()
     return response
 @manager_agent.tool
 async def call_intent_resolver(ctx: RunContext[ChatDeps], query: str) -> str:
     """Resolve a user's natural language query to a specific target identifier."""
@@ -125,11 +274,11 @@ async def run_manager_pipeline(
                     if new_part:
                         await event_queue.put(AgentStreamChunk(text=new_part))
                         last_length = len(text)
                 await event_queue.put(AgentComplete(new_history=result.new_messages()))
         except Exception as e:
             await event_queue.put(AgentComplete())
-            raise e
     # Run the agent in the background
     task = asyncio.create_task(run_agent())

 import os
 import asyncio
+from pathlib import Path
 from typing import AsyncGenerator, List, Any
+import httpx
 from pydantic_ai import Agent, RunContext
 from pydantic_ai.models.openai import OpenAIChatModel
 from pydantic_ai.models.test import TestModel
 from pydantic_ai.messages import ModelMessage
 from cli_textual.core.chat_events import (
+    ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentToolOutput,
+    AgentStreamChunk, AgentComplete, AgentRequiresUserInput, ChatDeps, AgentExecuteCommand
 )
 from cli_textual.agents.specialists import model, intent_resolver, data_validator, result_generator
 from cli_textual.core.agent_schemas import IntentResolution, ValidationResult, StructuredResult
 )
 @manager_agent.tool
+async def ask_user_to_select(ctx: RunContext[ChatDeps], prompt: str, options: List[str]) -> str:
+    """Show a selection menu in the TUI and WAIT for the user's choice before continuing.
+    ALWAYS call this tool when the user's message contains any selection intent:
+      - "let me select / choose / pick"
+      - "I want to choose / select"
+      - "help me pick"
+      - "first pick / first choose / first select"
+      - any phrasing where the user should decide between options
+    This tool PAUSES the agent and BLOCKS until the user makes a choice in the terminal UI.
+    You MUST call this BEFORE writing any response that depends on the user's selection.
+    The return value is the user's chosen option — use it in your response.
+    Args:
+        prompt: The question shown above the menu (e.g., "Choose a primary color:")
+        options: The list of choices to display (e.g., ["Red", "Blue", "Yellow"])
+    """
     await ctx.deps.event_queue.put(AgentRequiresUserInput(tool_name="/select", prompt=prompt, options=options))
     response = await ctx.deps.input_queue.get()
     return response
+@manager_agent.tool
+async def execute_slash_command(ctx: RunContext[ChatDeps], command_name: str, args: List[str] = None) -> str:
+    """Execute a TUI slash command (e.g. '/clear', '/ls').
+    Use this to trigger UI actions or system tools.
+    """
+    if args is None: args = []
+    # Ensure command name starts with /
+    if not command_name.startswith("/"):
+        command_name = f"/{command_name}"
+    await ctx.deps.event_queue.put(AgentExecuteCommand(command_name=command_name, args=args))
+    return f"Command {command_name} triggered in UI."
+@manager_agent.tool
+async def bash_exec(ctx: RunContext[ChatDeps], command: str, working_dir: str = ".") -> str:
+    """Execute a shell command and stream its output to the UI in real time.
+    Use this to run scripts, inspect the system, process files, or perform any
+    shell operation. stdout and stderr are merged and streamed as they arrive.
+    Output is capped at 8 KB; a truncation note is appended when exceeded.
+    Args:
+        command: The shell command to run (passed to /bin/sh)
+        working_dir: Working directory for the command (default: current directory)
+    """
+    await ctx.deps.event_queue.put(AgentToolStart(tool_name="bash_exec", args={"command": command}))
+    MAX_OUTPUT = 8192
+    output_parts: list[str] = []
+    exit_code = 1
+    try:
+        proc = await asyncio.create_subprocess_shell(
+            command,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+            cwd=working_dir,
+        )
+        assert proc.stdout is not None
+        while True:
+            chunk = await proc.stdout.read(1024)
+            if not chunk:
+                break
+            text = chunk.decode("utf-8", errors="replace")
+            output_parts.append(text)
+            await ctx.deps.event_queue.put(AgentToolOutput(tool_name="bash_exec", content=text))
+        await proc.wait()
+        exit_code = proc.returncode or 0
+    except Exception as exc:
+        err = f"Error: {exc}"
+        await ctx.deps.event_queue.put(AgentToolOutput(tool_name="bash_exec", content=err, is_error=True))
+        await ctx.deps.event_queue.put(AgentToolEnd(tool_name="bash_exec", result="error"))
+        return err
+    full_output = "".join(output_parts)
+    truncated = ""
+    if len(full_output) > MAX_OUTPUT:
+        full_output = full_output[:MAX_OUTPUT]
+        truncated = "\n[output truncated]"
+    result = f"Exit code: {exit_code}\n{full_output}{truncated}"
+    await ctx.deps.event_queue.put(AgentToolEnd(tool_name="bash_exec", result=f"exit {exit_code}"))
+    return result
+@manager_agent.tool
+async def read_file(ctx: RunContext[ChatDeps], path: str, start_line: int = 1, end_line: int = None) -> str:
+    """Read the contents of a local file, optionally restricted to a line range.
+    Args:
+        path: File path (relative to CWD or absolute)
+        start_line: First line to include, 1-indexed (default: 1)
+        end_line: Last line to include (default: read all, capped at 200 lines)
+    """
+    await ctx.deps.event_queue.put(AgentToolStart(tool_name="read_file", args={"path": path}))
+    MAX_CHARS = 8192
+    MAX_LINES = 200
+    try:
+        file_path = Path(path)
+        if not file_path.is_absolute():
+            file_path = Path.cwd() / file_path
+        lines = file_path.read_text(encoding="utf-8", errors="replace").splitlines()
+        start = max(0, start_line - 1)
+        end = min(len(lines), end_line if end_line is not None else len(lines))
+        end = min(end, start + MAX_LINES)
+        selected = lines[start:end]
+        content = "\n".join(selected)
+        truncated = ""
+        if len(content) > MAX_CHARS:
+            content = content[:MAX_CHARS]
+            truncated = "\n[truncated]"
+        result = content + truncated
+    except Exception as exc:
+        result = f"Error reading file: {exc}"
+        await ctx.deps.event_queue.put(AgentToolOutput(tool_name="read_file", content=result, is_error=True))
+        await ctx.deps.event_queue.put(AgentToolEnd(tool_name="read_file", result="error"))
+        return result
+    await ctx.deps.event_queue.put(AgentToolOutput(tool_name="read_file", content=result))
+    await ctx.deps.event_queue.put(AgentToolEnd(tool_name="read_file", result=f"{len(selected)} lines"))
+    return result
+@manager_agent.tool
+async def web_fetch(ctx: RunContext[ChatDeps], url: str) -> str:
+    """Fetch the contents of a URL via HTTP GET and return the response body.
+    Use this for REST APIs, documentation pages, or any web resource.
+    Response body is capped at 8 KB; a truncation note is appended when exceeded.
+    Args:
+        url: The URL to fetch
+    """
+    await ctx.deps.event_queue.put(AgentToolStart(tool_name="web_fetch", args={"url": url}))
+    MAX_CHARS = 8192
+    try:
+        async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
+            response = await client.get(url)
+        body = response.text
+        truncated = ""
+        if len(body) > MAX_CHARS:
+            body = body[:MAX_CHARS]
+            truncated = "\n[truncated]"
+        result = f"HTTP {response.status_code}\n{body}{truncated}"
+    except Exception as exc:
+        result = f"Error fetching URL: {exc}"
+        await ctx.deps.event_queue.put(AgentToolOutput(tool_name="web_fetch", content=result, is_error=True))
+        await ctx.deps.event_queue.put(AgentToolEnd(tool_name="web_fetch", result="error"))
+        return result
+    await ctx.deps.event_queue.put(AgentToolOutput(tool_name="web_fetch", content=result))
+    await ctx.deps.event_queue.put(AgentToolEnd(tool_name="web_fetch", result=f"HTTP {response.status_code}"))
+    return result
 @manager_agent.tool
 async def call_intent_resolver(ctx: RunContext[ChatDeps], query: str) -> str:
     """Resolve a user's natural language query to a specific target identifier."""
                     if new_part:
                         await event_queue.put(AgentStreamChunk(text=new_part))
                         last_length = len(text)
                 await event_queue.put(AgentComplete(new_history=result.new_messages()))
         except Exception as e:
+            await event_queue.put(AgentStreamChunk(text=f"\n\n**Error:** {e}"))
             await event_queue.put(AgentComplete())
     # Run the agent in the background
     task = asyncio.create_task(run_agent())

src/cli_textual/agents/prompts.yaml CHANGED Viewed

@@ -4,16 +4,39 @@ orchestrators:
   manager:
     name: "Manager Agent"
     system_prompt: |
-      You are a versatile agentic orchestrator designed to demonstrate multi-step task delegation.
-      You coordinate specialized sub-agents to fulfill user requests.
-      To answer a user question, you MUST follow this flow:
-      1. Resolve the primary intent or topic.
-      2. Validate the parameters or details.
-      3. Generate the final structured result.
-      If you are able to identify a specialized tool use it.
-      If a tool asks for clarification, relay that to the user.
-      You also have a tool to ask the user to select from a list if you need to disambiguate anything.
-      Be polite but very concise.
 specialists:
   intent_resolver:

   manager:
     name: "Manager Agent"
     system_prompt: |
+      You are a TUI Orchestrator.
+      CRITICAL RULE — SELECTION ALWAYS USES THE TOOL:
+      If the user expresses ANY selection intent, you MUST call 'ask_user_to_select' IMMEDIATELY
+      before writing any response. Trigger phrases include (but are not limited to):
+        - "let me select / choose / pick"
+        - "I want to choose / select / pick"
+        - "help me pick / choose"
+        - "first pick / first select / first choose"
+        - "you choose for me" / "give me options"
+        - any request where the user should decide between a set of options
+      NEVER pick a value on behalf of the user.
+      NEVER ask in plain text ("Which color do you prefer?").
+      ALWAYS call 'ask_user_to_select' and use the returned value in your response.
+      EXAMPLE:
+        User: "Tell me a story about a primary color but first let me select one"
+        CORRECT: Call ask_user_to_select(prompt="Choose a primary color:", options=["Red","Blue","Yellow"]),
+                 then write the story using the user's chosen color.
+        WRONG:   Picking "red" yourself and writing the story without asking.
+      WORKFLOW TOOLS:
+      - 'ask_user_to_select': For ALL choices, menus, and user-driven selections. Call this FIRST.
+      - 'bash_exec': Run shell commands or scripts. Output streams to the UI in real time.
+      - 'read_file': Read the contents of a local file (supports line ranges).
+      - 'web_fetch': HTTP GET a URL and return the response body. Use for REST APIs and web resources.
+      - 'call_intent_resolver': To identify subjects.
+      - 'call_data_validator': To check details.
+      - 'call_result_generator': For final output.
+      - 'execute_slash_command': To trigger TUI actions like /clear.
+      Maintain context and be concise.
 specialists:
   intent_resolver:

src/cli_textual/agents/specialists.py CHANGED Viewed

@@ -12,40 +12,43 @@ from cli_textual.agents.prompt_loader import PROMPTS
 load_dotenv()
 def get_model():
     """Dynamically select model based on environment variables."""
-    model_name = os.getenv("PYDANTIC_AI_MODEL", "test")
     if model_name.lower() == "test":
         return TestModel()
-    # Detect Provider/Model Split
-    provider = "openai"
     name = model_name
     if ":" in model_name:
-        provider, name = model_name.split(":", 1)
-    # 1. Handle OpenRouter
     openrouter_key = os.getenv("OPENROUTER_API_KEY")
-    if openrouter_key and (provider == "openai" or "anthropic/" in name or "google/" in name):
         return OpenAIChatModel(
-            name,
             provider=OpenAIProvider(
                 base_url="https://openrouter.ai/api/v1",
                 api_key=openrouter_key
             )
         )
-    # 2. Handle Native Providers
-    if provider == "openai":
-        return OpenAIChatModel(name)
-    elif provider == "anthropic":
         return AnthropicModel(name)
-    elif provider == "gemini" or provider == "google":
         return GeminiModel(name)
-    # Fallback
-    return OpenAIChatModel(model_name)
 # Initialize the shared model instance
 model = get_model()

 load_dotenv()
+KNOWN_PROVIDER_PREFIXES = {"anthropic", "openai", "gemini", "google"}
 def get_model():
     """Dynamically select model based on environment variables."""
+    model_name = os.getenv("PYDANTIC_AI_MODEL", "nvidia/nemotron-3-super-120b-a12b:free")
     if model_name.lower() == "test":
         return TestModel()
+    # Only split "provider:name" on ":" if the left side is a known single-token provider.
+    # This prevents "nvidia/model:free" style OpenRouter IDs from being mis-parsed.
+    provider = None
     name = model_name
     if ":" in model_name:
+        left, right = model_name.split(":", 1)
+        if left.lower() in KNOWN_PROVIDER_PREFIXES:
+            provider, name = left.lower(), right
+    # Route through OpenRouter when key is available and no explicit native provider was parsed
     openrouter_key = os.getenv("OPENROUTER_API_KEY")
+    if openrouter_key and provider is None:
         return OpenAIChatModel(
+            model_name,
             provider=OpenAIProvider(
                 base_url="https://openrouter.ai/api/v1",
                 api_key=openrouter_key
             )
         )
+    # Native providers
+    if provider == "anthropic":
         return AnthropicModel(name)
+    if provider == "gemini" or provider == "google":
         return GeminiModel(name)
+    # openai: prefix or bare model name (e.g. "gpt-4o")
+    return OpenAIChatModel(name if provider else model_name)
 # Initialize the shared model instance
 model = get_model()

src/cli_textual/app.py CHANGED Viewed

@@ -20,8 +20,8 @@ from cli_textual.core.permissions import PermissionManager
 from cli_textual.core.command import CommandManager
 from cli_textual.core.dummy_agent import DummyAgent
 from cli_textual.core.chat_events import (
-    ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentStreamChunk, AgentComplete,
-    AgentRequiresUserInput
 )
 # Pydantic AI Orchestrators
@@ -33,16 +33,6 @@ from cli_textual.ui.widgets.dna_spinner import DNASpinner
 from cli_textual.ui.screens.permission_screen import PermissionScreen
 from cli_textual.ui.widgets.landing_page import LandingPage
-# Plugin Imports (Simulated auto-discovery for now)
-from cli_textual.plugins.commands.ls import ListDirectoryCommand
-from cli_textual.plugins.commands.head import HeadCommand
-from cli_textual.plugins.commands.clear import ClearCommand
-from cli_textual.plugins.commands.load import LoadCommand
-from cli_textual.plugins.commands.select import SelectCommand
-from cli_textual.plugins.commands.survey import SurveyCommand
-from cli_textual.plugins.commands.help import HelpCommand
-from cli_textual.plugins.commands.mode import ModeCommand
 class ChatApp(App):
     """Refactored ChatApp using modular architecture."""
@@ -61,8 +51,10 @@ class ChatApp(App):
         self.last_ctrl_d_time = 0
         self.survey_answers = {}
         # Allow setting default mode via environment variable
-        self.chat_mode = os.getenv("CHAT_MODE", "dummy")
         self.message_history = [] # For LLM context memory
         # Initialize Core Managers
         self.workspace_root = Path.cwd().resolve()
@@ -71,18 +63,8 @@ class ChatApp(App):
         self.command_manager = CommandManager()
         self.agent = DummyAgent()
-        # Register Commands
-        self._init_commands()
-    def _init_commands(self):
-        self.command_manager.register_command(ListDirectoryCommand())
-        self.command_manager.register_command(HeadCommand())
-        self.command_manager.register_command(ClearCommand())
-        self.command_manager.register_command(LoadCommand())
-        self.command_manager.register_command(SelectCommand())
-        self.command_manager.register_command(SurveyCommand())
-        self.command_manager.register_command(HelpCommand())
-        self.command_manager.register_command(ModeCommand())
     def compose(self) -> ComposeResult:
         yield Header(show_clock=True)
@@ -95,6 +77,9 @@ class ChatApp(App):
             with Horizontal(id="status-bar"):
                 yield Label("workspace (/directory)", classes="status-info")
                 yield Label(f"mode: {self.chat_mode}", classes="status-info mode-info")
             yield Label(str(self.workspace_root), classes="path-info")
         yield Footer()
@@ -134,6 +119,9 @@ class ChatApp(App):
         # Resume the agent by pushing the selection into the queue
         if hasattr(self, "interactive_input_queue"):
             self.interactive_input_queue.put_nowait(selection)
         # Refocus main input
@@ -168,8 +156,6 @@ class ChatApp(App):
             await self.process_command(user_input)
         else:
             # Select orchestrator based on chat_mode
-            self.interactive_input_queue = asyncio.Queue()
             if self.chat_mode == "procedural":
                 generator = run_procedural_pipeline(user_input, message_history=self.message_history)
             elif self.chat_mode == "manager":
@@ -219,28 +205,43 @@ class ChatApp(App):
                 history.scroll_end(animate=False)
             elif isinstance(event, AgentToolStart):
                 task_label.update(f"Running tool: [bold cyan]{event.tool_name}[/]")
             elif isinstance(event, AgentStreamChunk):
                 # If we're starting to stream, remove the spinner and create the Markdown widget
                 if not markdown_widget:
-                    progress.remove()
                     markdown_widget = Markdown("", classes="ai-msg")
-                    history.mount(markdown_widget)
                 full_text += event.text
-                markdown_widget.update(full_text)
                 history.scroll_end(animate=False)
             elif isinstance(event, AgentComplete):
                 # Save new history for context memory
                 if event.new_history:
                     self.message_history.extend(event.new_history)
                 # If we never got a stream (e.g. only tool calls), remove progress
                 if "agent-progress" in [c.id for c in history.children]:
-                    progress.remove()
                 history.scroll_end(animate=False)
     async def process_command(self, cmd_str: str):

 from cli_textual.core.command import CommandManager
 from cli_textual.core.dummy_agent import DummyAgent
 from cli_textual.core.chat_events import (
+    ChatEvent, AgentThinking, AgentToolStart, AgentToolEnd, AgentToolOutput,
+    AgentStreamChunk, AgentComplete, AgentRequiresUserInput, AgentExecuteCommand
 )
 # Pydantic AI Orchestrators
 from cli_textual.ui.screens.permission_screen import PermissionScreen
 from cli_textual.ui.widgets.landing_page import LandingPage
 class ChatApp(App):
     """Refactored ChatApp using modular architecture."""
         self.last_ctrl_d_time = 0
         self.survey_answers = {}
         # Allow setting default mode via environment variable
+        self.chat_mode = os.getenv("CHAT_MODE", "manager")
         self.message_history = [] # For LLM context memory
+        self.interactive_input_queue = asyncio.Queue()
         # Initialize Core Managers
         self.workspace_root = Path.cwd().resolve()
         self.command_manager = CommandManager()
         self.agent = DummyAgent()
+        # Register Commands via Auto-Discovery
+        self.command_manager.auto_discover("cli_textual.plugins.commands")
     def compose(self) -> ComposeResult:
         yield Header(show_clock=True)
             with Horizontal(id="status-bar"):
                 yield Label("workspace (/directory)", classes="status-info")
                 yield Label(f"mode: {self.chat_mode}", classes="status-info mode-info")
+                from cli_textual.agents.specialists import model
+                model_name = getattr(model, "model_name", "test-mock")
+                yield Label(f"model: {model_name}", classes="status-info model-info")
             yield Label(str(self.workspace_root), classes="path-info")
         yield Footer()
         # Resume the agent by pushing the selection into the queue
         if hasattr(self, "interactive_input_queue"):
+            # Drain any stale entries (safety measure)
+            while not self.interactive_input_queue.empty():
+                self.interactive_input_queue.get_nowait()
             self.interactive_input_queue.put_nowait(selection)
         # Refocus main input
             await self.process_command(user_input)
         else:
             # Select orchestrator based on chat_mode
             if self.chat_mode == "procedural":
                 generator = run_procedural_pipeline(user_input, message_history=self.message_history)
             elif self.chat_mode == "manager":
                 history.scroll_end(animate=False)
+            elif isinstance(event, AgentExecuteCommand):
+                # Proactively execute a TUI command
+                full_cmd = event.command_name
+                if event.args:
+                    full_cmd += " " + " ".join(event.args)
+                await self.process_command(full_cmd)
             elif isinstance(event, AgentToolStart):
                 task_label.update(f"Running tool: [bold cyan]{event.tool_name}[/]")
+            elif isinstance(event, AgentToolOutput):
+                style_class = "tool-output-error" if event.is_error else "tool-output"
+                history.mount(Static(event.content, classes=style_class))
+                history.scroll_end(animate=False)
+            elif isinstance(event, AgentToolEnd):
+                task_label.update(f"Tool complete: [bold green]{event.tool_name}[/]")
             elif isinstance(event, AgentStreamChunk):
                 # If we're starting to stream, remove the spinner and create the Markdown widget
                 if not markdown_widget:
+                    await progress.remove()
                     markdown_widget = Markdown("", classes="ai-msg")
+                    await history.mount(markdown_widget)
                 full_text += event.text
+                await markdown_widget.update(full_text)
                 history.scroll_end(animate=False)
             elif isinstance(event, AgentComplete):
                 # Save new history for context memory
                 if event.new_history:
                     self.message_history.extend(event.new_history)
                 # If we never got a stream (e.g. only tool calls), remove progress
                 if "agent-progress" in [c.id for c in history.children]:
+                    await progress.remove()
                 history.scroll_end(animate=False)
     async def process_command(self, cmd_str: str):

src/cli_textual/app.tcss CHANGED Viewed

@@ -224,3 +224,19 @@ DirectoryTree {
     overflow-y: scroll;
     border: solid #333333;
 }

     overflow-y: scroll;
     border: solid #333333;
 }
+.tool-output {
+    background: #0D1117;
+    color: #C9D1D9;
+    border-left: solid #00AAFF;
+    padding: 0 1;
+    margin: 0 0 1 0;
+}
+.tool-output-error {
+    background: #1A0000;
+    color: #FF6B6B;
+    border-left: solid #FF4444;
+    padding: 0 1;
+    margin: 0 0 1 0;
+}

src/cli_textual/core/chat_events.py CHANGED Viewed

@@ -20,6 +20,12 @@ class AgentRequiresUserInput(ChatEvent):
     prompt: str
     options: List[str]
 @dataclass
 class AgentThinking(ChatEvent):
     """The agent is processing or waiting for a response."""
@@ -37,6 +43,13 @@ class AgentToolEnd(ChatEvent):
     tool_name: str
     result: str
 @dataclass
 class AgentStreamChunk(ChatEvent):
     """A partial chunk of the final text response."""

     prompt: str
     options: List[str]
+@dataclass
+class AgentExecuteCommand(ChatEvent):
+    """The agent wants to execute a TUI slash command."""
+    command_name: str
+    args: List[str]
 @dataclass
 class AgentThinking(ChatEvent):
     """The agent is processing or waiting for a response."""
     tool_name: str
     result: str
+@dataclass
+class AgentToolOutput(ChatEvent):
+    """Streaming output from a running tool (e.g., bash stdout, file contents)."""
+    tool_name: str
+    content: str
+    is_error: bool = False
 @dataclass
 class AgentStreamChunk(ChatEvent):
     """A partial chunk of the final text response."""

src/cli_textual/core/command.py CHANGED Viewed

@@ -1,42 +1,67 @@
 from abc import ABC, abstractmethod
-from typing import List
 class SlashCommand(ABC):
     """Base class for all slash commands."""
     @property
     @abstractmethod
     def name(self) -> str:
-        """The command string (e.g., /ls)."""
         pass
     @property
     @abstractmethod
     def description(self) -> str:
-        """Help text for the command."""
         pass
     @property
     def requires_permission(self) -> bool:
-        """Whether this command needs explicit user authorization."""
         return False
     @abstractmethod
     async def execute(self, app, args: List[str]):
-        """The implementation of the command."""
         pass
 class CommandManager:
-    """Registry and orchestrator for slash commands."""
     def __init__(self):
-        self.commands = {}
     def register_command(self, cmd: SlashCommand):
-        self.commands[cmd.name] = cmd
-    def get_command(self, name: str) -> SlashCommand | None:
-        return self.commands.get(name)
     def get_all_help(self) -> str:
         help_text = "### Commands\n"
@@ -44,3 +69,4 @@ class CommandManager:
             cmd = self.commands[name]
             help_text += f"- {name.ljust(15)} {cmd.description}\n"
         return help_text

+import importlib
+import pkgutil
+import inspect
 from abc import ABC, abstractmethod
+from typing import List, Dict, Type
 class SlashCommand(ABC):
     """Base class for all slash commands."""
     @property
     @abstractmethod
     def name(self) -> str:
+        """The command string, e.g., '/help'."""
         pass
     @property
     @abstractmethod
     def description(self) -> str:
+        """Brief summary of what the command does."""
         pass
     @property
     def requires_permission(self) -> bool:
+        """True if this command needs explicit user approval before running."""
         return False
     @abstractmethod
     async def execute(self, app, args: List[str]):
+        """The logic to run when the command is invoked."""
         pass
 class CommandManager:
+    """Registry and executor for slash commands."""
     def __init__(self):
+        self.commands: Dict[str, SlashCommand] = {}
     def register_command(self, cmd: SlashCommand):
+        """Manually register a command instance."""
+        self.commands[cmd.name.lower()] = cmd
+    def auto_discover(self, package_path: str):
+        """
+        Dynamically discover and register SlashCommand classes in a package.
+        e.g., auto_discover('cli_textual.plugins.commands')
+        """
+        try:
+            package = importlib.import_module(package_path)
+            for _, name, is_pkg in pkgutil.iter_modules(package.__path__):
+                full_module_name = f"{package_path}.{name}"
+                module = importlib.import_module(full_module_name)
+                for _, obj in inspect.getmembers(module):
+                    if (inspect.isclass(obj) and
+                        issubclass(obj, SlashCommand) and
+                        obj is not SlashCommand):
+                        # Instantiate and register
+                        instance = obj()
+                        self.register_command(instance)
+        except Exception as e:
+            print(f"Error during command discovery: {e}")
+    def get_command(self, name: str) -> SlashCommand:
+        return self.commands.get(name.lower())
     def get_all_help(self) -> str:
         help_text = "### Commands\n"
             cmd = self.commands[name]
             help_text += f"- {name.ljust(15)} {cmd.description}\n"
         return help_text

src/cli_textual/plugins/commands/tools.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from typing import List
+from textual import on
+from textual.app import ComposeResult
+from textual.widgets import Label, OptionList, Static
+from textual.widget import Widget
+from cli_textual.core.command import SlashCommand
+from cli_textual.agents.orchestrators import manager_agent
+def _first_line(text: str) -> str:
+    """Return the first non-empty line of a docstring."""
+    for line in (text or "").splitlines():
+        stripped = line.strip()
+        if stripped:
+            return stripped
+    return ""
+class ToolsWidget(Widget):
+    """Self-contained widget: shows tool list, then full description on selection."""
+    DEFAULT_CSS = """
+    ToolsWidget {
+        height: auto;
+        padding: 0 1;
+    }
+    ToolsWidget .tool-detail {
+        padding: 1;
+        color: $text-muted;
+    }
+    """
+    def compose(self) -> ComposeResult:
+        yield Label("Agent tools  (Enter to inspect, Esc to close)")
+        tools = manager_agent._function_toolset.tools
+        items = [
+            f"{name:<22} {_first_line(tool.description)}"
+            for name, tool in tools.items()
+        ]
+        yield OptionList(*items, id="tools-option-list")
+    @on(OptionList.OptionSelected, "#tools-option-list")
+    def show_detail(self, event: OptionList.OptionSelected) -> None:
+        tool_name = str(event.option.prompt).split()[0]
+        tools = manager_agent._function_toolset.tools
+        tool = tools.get(tool_name)
+        description = tool.description if tool else "(no description)"
+        self.query("*").remove()
+        self.mount(Label(f"[bold]{tool_name}[/bold]  (Esc to close)"))
+        self.mount(Static(description, classes="tool-detail"))
+class ToolsCommand(SlashCommand):
+    name = "/tools"
+    description = "List available agent tools"
+    async def execute(self, app, args: List[str]):
+        container = app.query_one("#interaction-container")
+        container.add_class("visible")
+        container.query("*").remove()
+        widget = ToolsWidget()
+        container.mount(widget)
+        app.call_after_refresh(
+            lambda: widget.query_one("#tools-option-list").focus()
+        )

tests/integration/test_interactive_agents.py CHANGED Viewed

@@ -11,8 +11,8 @@ async def test_manager_interactive_mock_backend():
     """Test the manager pipeline using a mock TestModel that forces a tool call."""
     input_queue = asyncio.Queue()
-    # We force the TestModel to call the ask_user_to_select_manager tool before finishing
-    mock_model = TestModel(call_tools=['ask_user_to_select_manager'])
     events = []
     with manager_agent.override(model=mock_model):
@@ -46,13 +46,14 @@ async def test_manager_integration_backend():
     events = []
     with manager_agent.override(model=real_model):
-        # We prompt the real LLM to explicitly use the tool
-        prompt = "Ask me what my favorite color is using your select tool, then write a funny sentence about it."
         pipeline = run_manager_pipeline(prompt, input_queue)
         async for event in pipeline:
             events.append(event)
             if isinstance(event, AgentRequiresUserInput):
                 await input_queue.put("Neon Pink")
     # Verify the LLM called the tool
@@ -77,17 +78,17 @@ async def test_manager_multi_turn_memory():
     history = []
-    # Turn 1: Tell the agent something
     with manager_agent.override(model=real_model):
-        async for event in run_manager_pipeline("My secret password is 'BANANA'. Remember it.", input_queue, history):
             if isinstance(event, AgentComplete):
                 history.extend(event.new_history)
     # Turn 2: Ask the agent to recall it
     events = []
     with manager_agent.override(model=real_model):
-        async for event in run_manager_pipeline("What was my secret password?", input_queue, history):
             events.append(event)
     full_text = "".join([e.text for e in events if isinstance(e, AgentStreamChunk)])
-    assert "BANANA" in full_text.upper(), f"The LLM forgot the secret. Output was: {full_text}"

     """Test the manager pipeline using a mock TestModel that forces a tool call."""
     input_queue = asyncio.Queue()
+    # We force the TestModel to call the ask_user_to_select tool before finishing
+    mock_model = TestModel(call_tools=['ask_user_to_select'])
     events = []
     with manager_agent.override(model=mock_model):
     events = []
     with manager_agent.override(model=real_model):
+        # Natural prompt — the system prompt and tool description should be compelling enough
+        prompt = "Tell me a story about a primary color but first let me select a color"
         pipeline = run_manager_pipeline(prompt, input_queue)
         async for event in pipeline:
             events.append(event)
             if isinstance(event, AgentRequiresUserInput):
+                # The LLM successfully paused and invoked the TUI tool!
                 await input_queue.put("Neon Pink")
     # Verify the LLM called the tool
     history = []
+    # Turn 1: Tell the agent something non-sensitive
     with manager_agent.override(model=real_model):
+        async for event in run_manager_pipeline("My favorite fruit is 'MANGO'. Remember it.", input_queue, history):
             if isinstance(event, AgentComplete):
                 history.extend(event.new_history)
     # Turn 2: Ask the agent to recall it
     events = []
     with manager_agent.override(model=real_model):
+        async for event in run_manager_pipeline("What was my favorite fruit?", input_queue, history):
             events.append(event)
     full_text = "".join([e.text for e in events if isinstance(e, AgentStreamChunk)])
+    assert "MANGO" in full_text.upper(), f"The LLM forgot the fruit. Output was: {full_text}"

tests/integration/test_tool_agents.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""Integration tests for native agent tools using a real LLM via OpenRouter.
+All tests are skipped unless OPENROUTER_API_KEY is set.  They exercise the full
+run_manager_pipeline path — real model, real tool execution, real event stream —
+and assert that the LLM correctly invokes tools and incorporates their output.
+"""
+import os
+import asyncio
+import pytest
+from cli_textual.agents.orchestrators import manager_agent, run_manager_pipeline
+from cli_textual.core.chat_events import (
+    AgentToolStart, AgentToolOutput, AgentStreamChunk, AgentComplete,
+    AgentRequiresUserInput,
+)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+SKIP_NO_KEY = pytest.mark.skipif(
+    not os.getenv("OPENROUTER_API_KEY"),
+    reason="OPENROUTER_API_KEY required for integration tests",
+)
+async def collect_pipeline(pipeline, input_queue, auto_respond=None) -> list:
+    """Drain pipeline events, optionally responding to AgentRequiresUserInput."""
+    events = []
+    async for event in pipeline:
+        events.append(event)
+        if isinstance(event, AgentRequiresUserInput) and auto_respond is not None:
+            await input_queue.put(auto_respond)
+    return events
+def text_from(events) -> str:
+    return "".join(e.text for e in events if isinstance(e, AgentStreamChunk))
+def tool_started(events, name: str) -> bool:
+    return any(isinstance(e, AgentToolStart) and e.tool_name == name for e in events)
+def tool_output_contains(events, name: str, substring: str) -> bool:
+    return any(
+        isinstance(e, AgentToolOutput) and e.tool_name == name and substring in e.content
+        for e in events
+    )
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+@SKIP_NO_KEY
+@pytest.mark.asyncio
+async def test_bash_exec_e2e():
+    """LLM should call bash_exec and incorporate the output in its response."""
+    input_queue = asyncio.Queue()
+    pipeline = run_manager_pipeline(
+        "Run the shell command 'echo hello world' and tell me what it outputs.",
+        input_queue,
+    )
+    events = await collect_pipeline(pipeline, input_queue)
+    assert tool_started(events, "bash_exec"), "Expected bash_exec tool call"
+    assert tool_output_contains(events, "bash_exec", "hello world"), \
+        "Expected 'hello world' in bash_exec output"
+    full_text = text_from(events)
+    assert "hello world" in full_text.lower(), \
+        f"LLM response did not mention 'hello world'. Got: {full_text[:300]}"
+    assert isinstance(events[-1], AgentComplete)
+@SKIP_NO_KEY
+@pytest.mark.asyncio
+async def test_read_file_e2e():
+    """LLM should call read_file when asked to inspect a file."""
+    input_queue = asyncio.Queue()
+    pipeline = run_manager_pipeline(
+        "Read the file src/cli_textual/core/chat_events.py and tell me what events are defined.",
+        input_queue,
+    )
+    events = await collect_pipeline(pipeline, input_queue)
+    assert tool_started(events, "read_file"), "Expected read_file tool call"
+    full_text = text_from(events)
+    # The file contains ChatEvent / AgentComplete — the LLM should mention at least one
+    assert any(keyword in full_text for keyword in ["ChatEvent", "AgentComplete", "event"]), \
+        f"LLM response didn't mention events. Got: {full_text[:300]}"
+    assert isinstance(events[-1], AgentComplete)
+@SKIP_NO_KEY
+@pytest.mark.asyncio
+async def test_web_fetch_e2e():
+    """LLM should call web_fetch when asked to retrieve a URL."""
+    input_queue = asyncio.Queue()
+    pipeline = run_manager_pipeline(
+        "Fetch the URL https://httpbin.org/json and tell me what the JSON contains.",
+        input_queue,
+    )
+    events = await collect_pipeline(pipeline, input_queue)
+    assert tool_started(events, "web_fetch"), "Expected web_fetch tool call"
+    # httpbin.org/json returns {"slideshow": ...}
+    assert tool_output_contains(events, "web_fetch", "slideshow") or \
+        tool_output_contains(events, "web_fetch", "200"), \
+        "Expected HTTP response in web_fetch output"
+    assert isinstance(events[-1], AgentComplete)
+@SKIP_NO_KEY
+@pytest.mark.asyncio
+async def test_select_then_bash_e2e():
+    """LLM should use ask_user_to_select first, then run bash_exec with the chosen command."""
+    input_queue = asyncio.Queue()
+    pipeline = run_manager_pipeline(
+        "Let me pick a shell command from a list, then run it and show me the output.",
+        input_queue,
+    )
+    events = await collect_pipeline(pipeline, input_queue, auto_respond="echo chosen_value")
+    assert any(isinstance(e, AgentRequiresUserInput) for e in events), \
+        "Expected a selection prompt"
+    assert tool_started(events, "bash_exec"), \
+        "Expected bash_exec to be called after selection"
+    full_text = text_from(events)
+    assert full_text, "Expected some text response"
+    assert isinstance(events[-1], AgentComplete)

tests/integration/test_ui_full_agent_flow.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import pytest
+import asyncio
+from textual.widgets import OptionList, Markdown, Label
+from pydantic_ai.models.test import TestModel
+from cli_textual.app import ChatApp
+from cli_textual.agents.orchestrators import manager_agent
+@pytest.mark.asyncio
+async def test_full_ui_interaction_round_trip():
+    """
+    Verify the full loop:
+    1. User sends message
+    2. Agent triggers selection UI
+    3. User makes selection
+    4. Agent completes with final output
+    """
+    app = ChatApp()
+    app.chat_mode = "manager"
+    # We force the TestModel to call the selection tool
+    mock_model = TestModel(call_tools=['ask_user_to_select'])
+    async with app.run_test() as pilot:
+        with manager_agent.override(model=mock_model):
+            # 1. Type message and submit
+            await pilot.press(*"tell me a story about a color", "enter")
+            # 2. Wait for the interaction container to become visible
+            # We use a loop to poll since agent responses are async
+            for _ in range(20):
+                interaction = app.query_one("#interaction-container")
+                if interaction.has_class("visible") and app.query("OptionList#agent-select-tool"):
+                    break
+                await pilot.pause(0.1)
+            else:
+                pytest.fail("Interaction UI never appeared")
+            # 3. Verify the selection list has options
+            option_list = app.query_one("#agent-select-tool", OptionList)
+            assert option_list.option_count > 0
+            # 4. Select the first option (Red) and press enter
+            await pilot.press("enter")
+            # 5. Wait for the agent to finish and the interaction UI to close
+            for _ in range(20):
+                if not interaction.has_class("visible"):
+                    break
+                await pilot.pause(0.1)
+            else:
+                pytest.fail("Interaction UI never closed after selection")
+            # 6. Verify the final AI message was mounted in history
+            history = app.query_one("#history-container")
+            # The TestModel response usually contains the tool call result or mock text
+            assert len(history.query(Markdown)) >= 1
+            # Final check that focus returned to main input
+            assert app.focused.id == "main-input"

tests/unit/test_agent_tools.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""Unit tests for the native manager_agent tools (bash_exec, read_file, web_fetch).
+Tools are called directly — the @agent.tool decorator registers them but returns
+the original function unchanged, so they can be invoked as plain async functions.
+A minimal mock RunContext carrying real asyncio.Queues stands in for the live
+pydantic-ai context.
+"""
+import asyncio
+import tempfile
+import os
+import pytest
+from unittest.mock import MagicMock, patch, AsyncMock
+from cli_textual.core.chat_events import (
+    ChatDeps, AgentToolStart, AgentToolEnd, AgentToolOutput,
+)
+from cli_textual.agents.orchestrators import bash_exec, read_file, web_fetch
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def make_ctx() -> tuple:
+    """Return (ctx, event_queue) backed by real asyncio.Queues."""
+    event_queue: asyncio.Queue = asyncio.Queue()
+    input_queue: asyncio.Queue = asyncio.Queue()
+    deps = ChatDeps(event_queue=event_queue, input_queue=input_queue)
+    ctx = MagicMock()
+    ctx.deps = deps
+    return ctx, event_queue
+async def drain(q: asyncio.Queue) -> list:
+    """Return all items currently in the queue without blocking."""
+    items = []
+    while not q.empty():
+        items.append(q.get_nowait())
+    return items
+# ---------------------------------------------------------------------------
+# bash_exec
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_bash_exec_captures_output():
+    ctx, event_queue = make_ctx()
+    result = await bash_exec(ctx, command="echo hello")
+    assert "hello" in result
+    assert "Exit code: 0" in result
+@pytest.mark.asyncio
+async def test_bash_exec_emits_lifecycle_events():
+    ctx, event_queue = make_ctx()
+    await bash_exec(ctx, command="echo lifecycle")
+    events = await drain(event_queue)
+    types = [type(e) for e in events]
+    assert AgentToolStart in types
+    assert AgentToolOutput in types
+    assert AgentToolEnd in types
+    # Order must be Start → Output → End
+    start_idx = next(i for i, e in enumerate(events) if isinstance(e, AgentToolStart))
+    output_idx = next(i for i, e in enumerate(events) if isinstance(e, AgentToolOutput))
+    end_idx = next(i for i, e in enumerate(events) if isinstance(e, AgentToolEnd))
+    assert start_idx < output_idx < end_idx
+@pytest.mark.asyncio
+async def test_bash_exec_output_event_contains_text():
+    ctx, event_queue = make_ctx()
+    await bash_exec(ctx, command="echo unique_marker_xyz")
+    events = await drain(event_queue)
+    output_events = [e for e in events if isinstance(e, AgentToolOutput)]
+    combined = "".join(e.content for e in output_events)
+    assert "unique_marker_xyz" in combined
+@pytest.mark.asyncio
+async def test_bash_exec_nonzero_exit_code():
+    ctx, _ = make_ctx()
+    result = await bash_exec(ctx, command="sh -c 'exit 42'")
+    assert "42" in result
+@pytest.mark.asyncio
+async def test_bash_exec_invalid_command_does_not_raise():
+    ctx, _ = make_ctx()
+    # A command that doesn't exist — should return a non-empty string, not raise
+    result = await bash_exec(ctx, command="__nonexistent_command_xyz__")
+    assert result
+# ---------------------------------------------------------------------------
+# read_file
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_read_file_returns_contents():
+    ctx, _ = make_ctx()
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+        f.write("line one\nline two\nline three\n")
+        tmp_path = f.name
+    try:
+        result = await read_file(ctx, path=tmp_path)
+        assert "line one" in result
+        assert "line two" in result
+        assert "line three" in result
+    finally:
+        os.unlink(tmp_path)
+@pytest.mark.asyncio
+async def test_read_file_line_range():
+    ctx, _ = make_ctx()
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+        f.write("alpha\nbeta\ngamma\ndelta\n")
+        tmp_path = f.name
+    try:
+        result = await read_file(ctx, path=tmp_path, start_line=2, end_line=3)
+        assert "beta" in result
+        assert "gamma" in result
+        assert "alpha" not in result
+        assert "delta" not in result
+    finally:
+        os.unlink(tmp_path)
+@pytest.mark.asyncio
+async def test_read_file_emits_lifecycle_events():
+    ctx, event_queue = make_ctx()
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+        f.write("content")
+        tmp_path = f.name
+    try:
+        await read_file(ctx, path=tmp_path)
+        events = await drain(event_queue)
+        types = [type(e) for e in events]
+        assert AgentToolStart in types
+        assert AgentToolOutput in types
+        assert AgentToolEnd in types
+    finally:
+        os.unlink(tmp_path)
+@pytest.mark.asyncio
+async def test_read_file_missing_returns_error_string():
+    ctx, event_queue = make_ctx()
+    result = await read_file(ctx, path="/nonexistent/path/file_xyz.txt")
+    assert "error" in result.lower() or "Error" in result
+    # Must also emit an error output event
+    events = await drain(event_queue)
+    error_events = [e for e in events if isinstance(e, AgentToolOutput) and e.is_error]
+    assert error_events
+# ---------------------------------------------------------------------------
+# web_fetch
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_web_fetch_returns_body():
+    ctx, _ = make_ctx()
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.text = '{"key": "value"}'
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(return_value=mock_response)
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+    with patch("cli_textual.agents.orchestrators.httpx.AsyncClient", return_value=mock_client):
+        result = await web_fetch(ctx, url="https://example.com/api")
+    assert "200" in result
+    assert "value" in result
+@pytest.mark.asyncio
+async def test_web_fetch_emits_lifecycle_events():
+    ctx, event_queue = make_ctx()
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.text = "body content"
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(return_value=mock_response)
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+    with patch("cli_textual.agents.orchestrators.httpx.AsyncClient", return_value=mock_client):
+        await web_fetch(ctx, url="https://example.com")
+    events = await drain(event_queue)
+    types = [type(e) for e in events]
+    assert AgentToolStart in types
+    assert AgentToolOutput in types
+    assert AgentToolEnd in types
+@pytest.mark.asyncio
+async def test_web_fetch_network_error_returns_error_string():
+    ctx, event_queue = make_ctx()
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(side_effect=Exception("connection refused"))
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+    with patch("cli_textual.agents.orchestrators.httpx.AsyncClient", return_value=mock_client):
+        result = await web_fetch(ctx, url="https://unreachable.example")
+    assert "error" in result.lower() or "Error" in result
+    events = await drain(event_queue)
+    error_events = [e for e in events if isinstance(e, AgentToolOutput) and e.is_error]
+    assert error_events

tests/unit/test_chat_ux.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import pytest
 from textual.widgets import Markdown, Static, Label
 from cli_textual.app import ChatApp
 from cli_textual.core.dummy_agent import DummyAgent
 from cli_textual.core.chat_events import AgentThinking, AgentComplete
 @pytest.mark.asyncio
 async def test_chat_agent_loop():
     """Verify the full agent interaction loop: Thinking -> Tool -> Stream."""
     app = ChatApp()
     # Inject dummy agent for predictable testing
     app.agent = DummyAgent()
@@ -40,3 +44,49 @@ async def test_chat_agent_loop():
         await pilot.pause(2.0)
         assert len(app.query(".agent-spinner")) == 0
         assert "How can I help" in getattr(ai_msg, "_markdown", "")

 import pytest
+from pydantic_ai.models.function import FunctionModel, AgentInfo
+from pydantic_ai.messages import ModelMessage
 from textual.widgets import Markdown, Static, Label
 from cli_textual.app import ChatApp
 from cli_textual.core.dummy_agent import DummyAgent
 from cli_textual.core.chat_events import AgentThinking, AgentComplete
+from cli_textual.agents.orchestrators import manager_agent
 @pytest.mark.asyncio
 async def test_chat_agent_loop():
     """Verify the full agent interaction loop: Thinking -> Tool -> Stream."""
     app = ChatApp()
+    app.chat_mode = "dummy"
     # Inject dummy agent for predictable testing
     app.agent = DummyAgent()
         await pilot.pause(2.0)
         assert len(app.query(".agent-spinner")) == 0
         assert "How can I help" in getattr(ai_msg, "_markdown", "")
+@pytest.mark.asyncio
+async def test_manager_response_renders_in_tui():
+    """Verify that the manager pipeline response actually appears rendered in the UI.
+    This test specifically guards against the Markdown.update() await bug:
+    - Markdown._markdown (set synchronously) would be non-empty even without await
+    - But Markdown's child MarkdownBlock widgets are only created in the async part
+    - Without `await markdown_widget.update(...)`, children are never mounted
+      and the widget renders as a blank Blank object despite _markdown being set.
+    """
+    RESPONSE_TEXT = "Sentinel response from the deterministic test model."
+    async def fixed_response(messages: list[ModelMessage], agent_info: AgentInfo):
+        yield RESPONSE_TEXT
+    app = ChatApp()
+    app.chat_mode = "manager"
+    with manager_agent.override(model=FunctionModel(stream_function=fixed_response)):
+        async with app.run_test(size=(120, 40)) as pilot:
+            await pilot.press(*"hello", "enter")
+            await pilot.pause(2.0)
+            history = app.query_one("#history-container")
+            ai_widgets = list(history.query(".ai-msg"))
+            assert ai_widgets, "No .ai-msg widget found — response was never rendered"
+            md_widget = ai_widgets[-1]
+            assert isinstance(md_widget, Markdown), \
+                f"Expected Markdown widget, got {type(md_widget).__name__}"
+            # _markdown is set synchronously — this alone does NOT prove rendering worked
+            content = getattr(md_widget, "_markdown", "")
+            assert RESPONSE_TEXT in content, \
+                f"Response text missing from _markdown. Got: {repr(content)}"
+            # MarkdownBlock children are only created by the async part of update().
+            # If update() was not awaited, this list will be empty and the widget
+            # displays as blank despite _markdown being set.
+            child_blocks = list(md_widget.query("*"))
+            assert child_blocks, (
+                "Markdown widget has no rendered child blocks. "
+                "This means update() was called without await — the widget appears blank to the user."
+            )

tests/unit/test_manager_interaction.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import json
+import asyncio
+import pytest
+from pydantic_ai.models.test import TestModel
+from pydantic_ai.models.function import FunctionModel, AgentInfo, DeltaToolCall
+from pydantic_ai.messages import ModelMessage, ModelRequest, ToolReturnPart
+from cli_textual.agents.orchestrators import manager_agent, run_manager_pipeline
+from cli_textual.core.chat_events import AgentRequiresUserInput, AgentStreamChunk, AgentComplete, AgentThinking
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+async def _collect_pipeline(pipeline, input_queue, auto_respond=None):
+    """Drain a pipeline, optionally responding to any AgentRequiresUserInput."""
+    events = []
+    async for event in pipeline:
+        events.append(event)
+        if isinstance(event, AgentRequiresUserInput) and auto_respond is not None:
+            await input_queue.put(auto_respond)
+    return events
+def _has_tool_return(messages: list[ModelMessage]) -> bool:
+    return any(
+        isinstance(msg, ModelRequest) and any(isinstance(p, ToolReturnPart) for p in msg.parts)
+        for msg in messages
+    )
+# ---------------------------------------------------------------------------
+# Stream functions for FunctionModel
+# ---------------------------------------------------------------------------
+async def text_only_stream(messages: list[ModelMessage], agent_info: AgentInfo):
+    """Simulates an LLM that ignores the tool and just writes text."""
+    yield "Once upon a time, there was a red sunset that painted the sky crimson."
+async def select_then_text_stream(messages: list[ModelMessage], agent_info: AgentInfo):
+    """Simulates an LLM that correctly calls ask_user_to_select first, then responds."""
+    if _has_tool_return(messages):
+        # Second call after the tool returned: write the story
+        yield "Here is your story about the chosen color!"
+    else:
+        # First call: issue the tool call
+        yield {
+            0: DeltaToolCall(
+                name="ask_user_to_select",
+                json_args=json.dumps({
+                    "prompt": "Choose a primary color:",
+                    "options": ["Red", "Blue", "Yellow"],
+                }),
+            )
+        }
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_pipeline_plumbing_with_forced_tool_call():
+    """Verify the pipeline infrastructure works when the tool IS called.
+    Uses TestModel(call_tools=[...]) to force tool invocation — this tests
+    the event-queue / input-queue bridge, not LLM prompt quality.
+    """
+    input_queue = asyncio.Queue()
+    mock_model = TestModel(call_tools=["ask_user_to_select"])
+    with manager_agent.override(model=mock_model):
+        pipeline = run_manager_pipeline(
+            "Tell me a story about a primary color but first let me select a color",
+            input_queue,
+        )
+        events = await _collect_pipeline(pipeline, input_queue, auto_respond="Blue")
+    assert any(isinstance(e, AgentRequiresUserInput) for e in events)
+    req = next(e for e in events if isinstance(e, AgentRequiresUserInput))
+    assert req.tool_name == "/select"
+    assert any(isinstance(e, AgentStreamChunk) for e in events)
+    assert isinstance(events[-1], AgentComplete)
+@pytest.mark.asyncio
+async def test_pipeline_text_only_emits_no_user_input_event():
+    """Document what happens when the LLM returns text without calling the tool.
+    This test captures the BROKEN behavior: if the LLM ignores ask_user_to_select,
+    no AgentRequiresUserInput event is emitted and the user never gets a choice.
+    A passing test here means the pipeline handles this gracefully (no crash),
+    but the user experience is wrong — the LLM should always call the tool.
+    """
+    input_queue = asyncio.Queue()
+    text_only_model = FunctionModel(stream_function=text_only_stream)
+    with manager_agent.override(model=text_only_model):
+        pipeline = run_manager_pipeline(
+            "Tell me a story about a primary color but first let me select a color",
+            input_queue,
+        )
+        events = await _collect_pipeline(pipeline, input_queue)
+    # No selection event — the LLM skipped the tool
+    assert not any(isinstance(e, AgentRequiresUserInput) for e in events)
+    # But we still get text and a clean completion
+    assert any(isinstance(e, AgentStreamChunk) for e in events)
+    assert isinstance(events[-1], AgentComplete)
+@pytest.mark.asyncio
+async def test_pipeline_with_function_model_select_then_respond():
+    """Verify the full selection flow using a FunctionModel that mimics correct LLM behavior.
+    This tests the same pipeline path a real LLM takes when it respects the
+    system prompt and calls ask_user_to_select before writing a response.
+    """
+    input_queue = asyncio.Queue()
+    select_model = FunctionModel(stream_function=select_then_text_stream)
+    with manager_agent.override(model=select_model):
+        pipeline = run_manager_pipeline(
+            "Tell me a story about a primary color but first let me select a color",
+            input_queue,
+        )
+        events = await _collect_pipeline(pipeline, input_queue, auto_respond="Red")
+    # Must get a selection event
+    assert any(isinstance(e, AgentRequiresUserInput) for e in events), \
+        "Expected AgentRequiresUserInput but LLM skipped the tool"
+    req = next(e for e in events if isinstance(e, AgentRequiresUserInput))
+    assert req.tool_name == "/select"
+    assert len(req.options) > 0
+    # Must get a text response after the selection
+    assert any(isinstance(e, AgentStreamChunk) for e in events)
+    assert isinstance(events[-1], AgentComplete)

tests/unit/test_pydantic_agents.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import asyncio
 import pytest
-from cli_textual.agents.orchestrators import run_procedural_pipeline, run_manager_pipeline
 from cli_textual.core.chat_events import (
     AgentThinking, AgentToolStart, AgentToolEnd, AgentStreamChunk, AgentComplete,
     AgentRequiresUserInput
@@ -23,21 +25,17 @@ async def test_procedural_pipeline_flow():
 @pytest.mark.asyncio
 async def test_manager_pipeline_flow():
     """Verify that the manager pipeline initializes and completes."""
     events = []
     input_queue = asyncio.Queue()
-    # We wrap this in a timeout to prevent hanging
-    try:
         async with asyncio.timeout(5):
             pipeline = run_manager_pipeline("test prompt", input_queue)
             async for event in pipeline:
                 events.append(event)
-                # If the TestModel randomly decides to call a tool (like selection), unblock it
-                if isinstance(event, AgentRequiresUserInput):
-                    await input_queue.put("mock selection")
-    except asyncio.TimeoutError:
-        pytest.fail("test_manager_pipeline_flow timed out - likely deadlocked on queue.get()")
-    # Manager pipeline using TestModel should at least think and complete.
     assert any(isinstance(e, AgentThinking) for e in events)
     assert isinstance(events[-1], AgentComplete)

 import asyncio
 import pytest
+from pydantic_ai.models.function import FunctionModel, AgentInfo
+from pydantic_ai.messages import ModelMessage
+from cli_textual.agents.orchestrators import run_procedural_pipeline, run_manager_pipeline, manager_agent
 from cli_textual.core.chat_events import (
     AgentThinking, AgentToolStart, AgentToolEnd, AgentStreamChunk, AgentComplete,
     AgentRequiresUserInput
 @pytest.mark.asyncio
 async def test_manager_pipeline_flow():
     """Verify that the manager pipeline initializes and completes."""
+    async def fixed_response(messages: list[ModelMessage], agent_info: AgentInfo):
+        yield "done"
     events = []
     input_queue = asyncio.Queue()
+    with manager_agent.override(model=FunctionModel(stream_function=fixed_response)):
         async with asyncio.timeout(5):
             pipeline = run_manager_pipeline("test prompt", input_queue)
             async for event in pipeline:
                 events.append(event)
     assert any(isinstance(e, AgentThinking) for e in events)
     assert isinstance(events[-1], AgentComplete)

tests/unit/test_tools_command.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import pytest
+from unittest.mock import patch, MagicMock
+from pydantic_ai.models.test import TestModel
+from textual.widgets import Label, OptionList, Static
+from cli_textual.app import ChatApp
+from cli_textual.plugins.commands.tools import ToolsWidget, ToolsCommand, _first_line
+from cli_textual.agents.orchestrators import manager_agent
+# ---------------------------------------------------------------------------
+# _first_line helper
+# ---------------------------------------------------------------------------
+def test_first_line_returns_first_non_empty():
+    assert _first_line("\n\n  Hello world\n  more text") == "Hello world"
+def test_first_line_empty_string():
+    assert _first_line("") == ""
+def test_first_line_only_whitespace():
+    assert _first_line("   \n  \n") == ""
+# ---------------------------------------------------------------------------
+# ToolsWidget
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_tools_widget_composes_option_list():
+    """ToolsWidget should render an OptionList with one item per tool."""
+    app = ChatApp()
+    async with app.run_test(size=(120, 40)) as pilot:
+        widget = ToolsWidget()
+        history = app.query_one("#history-container")
+        await history.mount(widget)
+        await pilot.pause(0.1)
+        option_list = widget.query_one("#tools-option-list", OptionList)
+        assert option_list is not None
+        tool_count = len(manager_agent._function_toolset.tools)
+        assert option_list.option_count == tool_count
+@pytest.mark.asyncio
+async def test_tools_widget_shows_detail_on_selection():
+    """Selecting a tool in the OptionList should swap to the detail view."""
+    app = ChatApp()
+    async with app.run_test(size=(120, 40)) as pilot:
+        widget = ToolsWidget()
+        history = app.query_one("#history-container")
+        await history.mount(widget)
+        await pilot.pause(0.1)
+        option_list = widget.query_one("#tools-option-list", OptionList)
+        option_list.focus()
+        await pilot.pause(0.05)
+        # Select the first item
+        await pilot.press("enter")
+        await pilot.pause(0.2)
+        # OptionList should be gone, Static detail should be present
+        assert not widget.query("#tools-option-list")
+        assert widget.query(".tool-detail")
+@pytest.mark.asyncio
+async def test_tools_widget_detail_contains_tool_name():
+    """Detail view Label should contain the selected tool's name."""
+    first_tool_name = next(iter(manager_agent._function_toolset.tools))
+    app = ChatApp()
+    async with app.run_test(size=(120, 40)) as pilot:
+        widget = ToolsWidget()
+        history = app.query_one("#history-container")
+        await history.mount(widget)
+        await pilot.pause(0.1)
+        option_list = widget.query_one("#tools-option-list", OptionList)
+        option_list.focus()
+        await pilot.pause(0.05)
+        await pilot.press("enter")
+        await pilot.pause(0.2)
+        labels = list(widget.query(Label))
+        assert any(first_tool_name in str(lbl.render()) for lbl in labels)
+# ---------------------------------------------------------------------------
+# ToolsCommand integration with ChatApp
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_tools_command_mounts_widget():
+    """/tools command should mount ToolsWidget into #interaction-container."""
+    app = ChatApp()
+    async with app.run_test(size=(120, 40)) as pilot:
+        await pilot.press(*"/tools", "enter")
+        await pilot.pause(0.3)
+        container = app.query_one("#interaction-container")
+        assert "visible" in container.classes
+        assert container.query(ToolsWidget)