Spaces:

smolagents
/

ml-agent

Running

akseljoonas HF Staff commited on 15 days ago

Commit

af6a7ab

1 Parent(s): 110c57a

feat: add headless CLI mode with local filesystem tools and rich terminal rendering

- Add argparse-based headless mode: `python -m agent.main "prompt here"`
- Local tool specs override sandbox /app references for CLI mode
- System prompt injects CLI-specific context (working directory, no sandbox)
- Extract streaming/non-streaming LLM call helpers with LLMResult dataclass
- Add shimmer thinking animation (truecolor gradient sweep)
- Progressive markdown rendering via rich Live display
- Clean MCP shutdown sequence, suppress asyncio teardown noise
- Add rich dependency to agent extras

Files changed (6) hide show

agent/context_manager/manager.py +20 -0
agent/core/agent_loop.py +180 -97
agent/core/session.py +4 -0
agent/main.py +312 -7
agent/tools/local_tools.py +58 -2
pyproject.toml +1 -0

agent/context_manager/manager.py CHANGED Viewed

@@ -79,11 +79,13 @@ class ContextManager:
         tool_specs: list[dict[str, Any]] | None = None,
         prompt_file_suffix: str = "system_prompt_v3.yaml",
         hf_token: str | None = None,
     ):
         self.system_prompt = self._load_system_prompt(
             tool_specs or [],
             prompt_file_suffix="system_prompt_v3.yaml",
             hf_token=hf_token,
         )
         self.max_context = max_context - 10000
         self.compact_size = int(max_context * compact_size)
@@ -96,6 +98,7 @@ class ContextManager:
         tool_specs: list[dict[str, Any]],
         prompt_file_suffix: str = "system_prompt.yaml",
         hf_token: str | None = None,
     ):
         """Load and render the system prompt from YAML file with Jinja2"""
         prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
@@ -119,6 +122,23 @@ class ContextManager:
             tools=tool_specs,
             num_tools=len(tool_specs),
         )
         return (
             f"{static_prompt}\n\n"
             f"[Session context: Date={current_date}, Time={current_time}, "

         tool_specs: list[dict[str, Any]] | None = None,
         prompt_file_suffix: str = "system_prompt_v3.yaml",
         hf_token: str | None = None,
+        local_mode: bool = False,
     ):
         self.system_prompt = self._load_system_prompt(
             tool_specs or [],
             prompt_file_suffix="system_prompt_v3.yaml",
             hf_token=hf_token,
+            local_mode=local_mode,
         )
         self.max_context = max_context - 10000
         self.compact_size = int(max_context * compact_size)
         tool_specs: list[dict[str, Any]],
         prompt_file_suffix: str = "system_prompt.yaml",
         hf_token: str | None = None,
+        local_mode: bool = False,
     ):
         """Load and render the system prompt from YAML file with Jinja2"""
         prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
             tools=tool_specs,
             num_tools=len(tool_specs),
         )
+        # CLI-specific context for local mode
+        if local_mode:
+            import os
+            cwd = os.getcwd()
+            local_context = (
+                f"\n\n# CLI / Local mode\n\n"
+                f"You are running as a local CLI tool on the user's machine. "
+                f"There is NO sandbox — bash, read, write, and edit operate directly "
+                f"on the local filesystem.\n\n"
+                f"Working directory: {cwd}\n"
+                f"Use absolute paths or paths relative to the working directory. "
+                f"Do NOT use /app/ paths — that is a sandbox convention that does not apply here.\n"
+                f"The sandbox_create tool is NOT available. Run code directly with bash."
+            )
+            static_prompt += local_context
         return (
             f"{static_prompt}\n\n"
             f"[Session context: Date={current_date}, Time={current_time}, "

agent/core/agent_loop.py CHANGED Viewed

@@ -6,6 +6,7 @@ import asyncio
 import json
 import logging
 import os
 from litellm import ChatCompletionMessageToolCall, Message, acompletion
 from litellm.exceptions import ContextWindowExceededError
@@ -244,6 +245,164 @@ async def _cleanup_on_cancel(session: Session) -> None:
         session._running_job_ids.clear()
 class Handlers:
     """Handler functions for each operation type"""
@@ -345,98 +504,17 @@ class Handlers:
             messages = session.context_manager.get_messages()
             tools = session.tool_router.get_tool_specs_for_llm()
             try:
-                # ── Stream the LLM response (with retry for transient errors) ──
                 llm_params = _resolve_hf_router_params(session.config.model_name)
-                response = None
-                for _llm_attempt in range(_MAX_LLM_RETRIES):
-                    try:
-                        response = await acompletion(
-                            messages=messages,
-                            tools=tools,
-                            tool_choice="auto",
-                            stream=True,
-                            stream_options={"include_usage": True},
-                            timeout=600,
-                            **llm_params,
-                        )
-                        break
-                    except ContextWindowExceededError:
-                        raise
-                    except Exception as e:
-                        if _llm_attempt < _MAX_LLM_RETRIES - 1 and _is_transient_error(e):
-                            _delay = _LLM_RETRY_DELAYS[_llm_attempt]
-                            logger.warning(
-                                "Transient LLM error (attempt %d/%d): %s — retrying in %ds",
-                                _llm_attempt + 1, _MAX_LLM_RETRIES, e, _delay,
-                            )
-                            await session.send_event(Event(
-                                event_type="tool_log",
-                                data={"tool": "system", "log": f"LLM connection error, retrying in {_delay}s..."},
-                            ))
-                            await asyncio.sleep(_delay)
-                            continue
-                        raise
-                full_content = ""
-                tool_calls_acc: dict[int, dict] = {}
-                token_count = 0
-                finish_reason = None
-                async for chunk in response:
-                    # ── Check cancellation during streaming ──
-                    if session.is_cancelled:
-                        tool_calls_acc.clear()
-                        break
-                    choice = chunk.choices[0] if chunk.choices else None
-                    if not choice:
-                        # Last chunk may carry only usage info
-                        if hasattr(chunk, "usage") and chunk.usage:
-                            token_count = chunk.usage.total_tokens
-                        continue
-                    delta = choice.delta
-                    if choice.finish_reason:
-                        finish_reason = choice.finish_reason
-                    # Stream text deltas to the frontend
-                    if delta.content:
-                        full_content += delta.content
-                        await session.send_event(
-                            Event(
-                                event_type="assistant_chunk",
-                                data={"content": delta.content},
-                            )
-                        )
-                    # Accumulate tool-call deltas (name + args arrive in pieces)
-                    if delta.tool_calls:
-                        for tc_delta in delta.tool_calls:
-                            idx = tc_delta.index
-                            if idx not in tool_calls_acc:
-                                tool_calls_acc[idx] = {
-                                    "id": "",
-                                    "type": "function",
-                                    "function": {"name": "", "arguments": ""},
-                                }
-                            if tc_delta.id:
-                                tool_calls_acc[idx]["id"] = tc_delta.id
-                            if tc_delta.function:
-                                if tc_delta.function.name:
-                                    tool_calls_acc[idx]["function"]["name"] += (
-                                        tc_delta.function.name
-                                    )
-                                if tc_delta.function.arguments:
-                                    tool_calls_acc[idx]["function"]["arguments"] += (
-                                        tc_delta.function.arguments
-                                    )
-                    # Capture usage from the final chunk
-                    if hasattr(chunk, "usage") and chunk.usage:
-                        token_count = chunk.usage.total_tokens
-                # ── Stream finished — reconstruct full message ───────
-                content = full_content or None
                 # If output was truncated, all tool call args are garbage.
                 # Inject a system hint so the LLM retries with smaller content.
@@ -468,9 +546,10 @@ class Handlers:
                     session.context_manager.add_message(
                         Message(role="user", content=f"[SYSTEM: {truncation_hint}]")
                     )
-                    await session.send_event(
-                        Event(event_type="assistant_stream_end", data={})
-                    )
                     await session.send_event(
                         Event(
                             event_type="tool_log",
@@ -496,9 +575,10 @@ class Handlers:
                     )
                 # Signal end of streaming to the frontend
-                await session.send_event(
-                    Event(event_type="assistant_stream_end", data={})
-                )
                 # If no tool calls, add assistant message and we're done
                 if not tool_calls:
@@ -1043,6 +1123,8 @@ async def submission_loop(
     tool_router: ToolRouter | None = None,
     session_holder: list | None = None,
     hf_token: str | None = None,
 ) -> None:
     """
     Main agent loop - processes submissions and dispatches to handlers.
@@ -1051,7 +1133,8 @@ async def submission_loop(
     # Create session with tool router
     session = Session(
-        event_queue, config=config, tool_router=tool_router, hf_token=hf_token
     )
     if session_holder is not None:
         session_holder[0] = session

 import json
 import logging
 import os
+from dataclasses import dataclass
 from litellm import ChatCompletionMessageToolCall, Message, acompletion
 from litellm.exceptions import ContextWindowExceededError
         session._running_job_ids.clear()
+@dataclass
+class LLMResult:
+    """Result from an LLM call (streaming or non-streaming)."""
+    content: str | None
+    tool_calls_acc: dict[int, dict]
+    token_count: int
+    finish_reason: str | None
+async def _call_llm_streaming(session: Session, messages, tools, llm_params) -> LLMResult:
+    """Call the LLM with streaming, emitting assistant_chunk events."""
+    response = None
+    for _llm_attempt in range(_MAX_LLM_RETRIES):
+        try:
+            response = await acompletion(
+                messages=messages,
+                tools=tools,
+                tool_choice="auto",
+                stream=True,
+                stream_options={"include_usage": True},
+                timeout=600,
+                **llm_params,
+            )
+            break
+        except ContextWindowExceededError:
+            raise
+        except Exception as e:
+            if _llm_attempt < _MAX_LLM_RETRIES - 1 and _is_transient_error(e):
+                _delay = _LLM_RETRY_DELAYS[_llm_attempt]
+                logger.warning(
+                    "Transient LLM error (attempt %d/%d): %s — retrying in %ds",
+                    _llm_attempt + 1, _MAX_LLM_RETRIES, e, _delay,
+                )
+                await session.send_event(Event(
+                    event_type="tool_log",
+                    data={"tool": "system", "log": f"LLM connection error, retrying in {_delay}s..."},
+                ))
+                await asyncio.sleep(_delay)
+                continue
+            raise
+    full_content = ""
+    tool_calls_acc: dict[int, dict] = {}
+    token_count = 0
+    finish_reason = None
+    async for chunk in response:
+        if session.is_cancelled:
+            tool_calls_acc.clear()
+            break
+        choice = chunk.choices[0] if chunk.choices else None
+        if not choice:
+            if hasattr(chunk, "usage") and chunk.usage:
+                token_count = chunk.usage.total_tokens
+            continue
+        delta = choice.delta
+        if choice.finish_reason:
+            finish_reason = choice.finish_reason
+        if delta.content:
+            full_content += delta.content
+            await session.send_event(
+                Event(event_type="assistant_chunk", data={"content": delta.content})
+            )
+        if delta.tool_calls:
+            for tc_delta in delta.tool_calls:
+                idx = tc_delta.index
+                if idx not in tool_calls_acc:
+                    tool_calls_acc[idx] = {
+                        "id": "", "type": "function",
+                        "function": {"name": "", "arguments": ""},
+                    }
+                if tc_delta.id:
+                    tool_calls_acc[idx]["id"] = tc_delta.id
+                if tc_delta.function:
+                    if tc_delta.function.name:
+                        tool_calls_acc[idx]["function"]["name"] += tc_delta.function.name
+                    if tc_delta.function.arguments:
+                        tool_calls_acc[idx]["function"]["arguments"] += tc_delta.function.arguments
+        if hasattr(chunk, "usage") and chunk.usage:
+            token_count = chunk.usage.total_tokens
+    return LLMResult(
+        content=full_content or None,
+        tool_calls_acc=tool_calls_acc,
+        token_count=token_count,
+        finish_reason=finish_reason,
+    )
+async def _call_llm_non_streaming(session: Session, messages, tools, llm_params) -> LLMResult:
+    """Call the LLM without streaming, emit assistant_message at the end."""
+    response = None
+    for _llm_attempt in range(_MAX_LLM_RETRIES):
+        try:
+            response = await acompletion(
+                messages=messages,
+                tools=tools,
+                tool_choice="auto",
+                stream=False,
+                timeout=600,
+                **llm_params,
+            )
+            break
+        except ContextWindowExceededError:
+            raise
+        except Exception as e:
+            if _llm_attempt < _MAX_LLM_RETRIES - 1 and _is_transient_error(e):
+                _delay = _LLM_RETRY_DELAYS[_llm_attempt]
+                logger.warning(
+                    "Transient LLM error (attempt %d/%d): %s — retrying in %ds",
+                    _llm_attempt + 1, _MAX_LLM_RETRIES, e, _delay,
+                )
+                await session.send_event(Event(
+                    event_type="tool_log",
+                    data={"tool": "system", "log": f"LLM connection error, retrying in {_delay}s..."},
+                ))
+                await asyncio.sleep(_delay)
+                continue
+            raise
+    choice = response.choices[0]
+    message = choice.message
+    content = message.content or None
+    finish_reason = choice.finish_reason
+    token_count = response.usage.total_tokens if response.usage else 0
+    # Build tool_calls_acc in the same format as streaming
+    tool_calls_acc: dict[int, dict] = {}
+    if message.tool_calls:
+        for idx, tc in enumerate(message.tool_calls):
+            tool_calls_acc[idx] = {
+                "id": tc.id,
+                "type": "function",
+                "function": {
+                    "name": tc.function.name,
+                    "arguments": tc.function.arguments,
+                },
+            }
+    # Emit the full message as a single event
+    if content:
+        await session.send_event(
+            Event(event_type="assistant_message", data={"content": content})
+        )
+    return LLMResult(
+        content=content,
+        tool_calls_acc=tool_calls_acc,
+        token_count=token_count,
+        finish_reason=finish_reason,
+    )
 class Handlers:
     """Handler functions for each operation type"""
             messages = session.context_manager.get_messages()
             tools = session.tool_router.get_tool_specs_for_llm()
             try:
+                # ── Call the LLM (streaming or non-streaming) ──
                 llm_params = _resolve_hf_router_params(session.config.model_name)
+                if session.stream:
+                    llm_result = await _call_llm_streaming(session, messages, tools, llm_params)
+                else:
+                    llm_result = await _call_llm_non_streaming(session, messages, tools, llm_params)
+                content = llm_result.content
+                tool_calls_acc = llm_result.tool_calls_acc
+                token_count = llm_result.token_count
+                finish_reason = llm_result.finish_reason
                 # If output was truncated, all tool call args are garbage.
                 # Inject a system hint so the LLM retries with smaller content.
                     session.context_manager.add_message(
                         Message(role="user", content=f"[SYSTEM: {truncation_hint}]")
                     )
+                    if session.stream:
+                        await session.send_event(
+                            Event(event_type="assistant_stream_end", data={})
+                        )
                     await session.send_event(
                         Event(
                             event_type="tool_log",
                     )
                 # Signal end of streaming to the frontend
+                if session.stream:
+                    await session.send_event(
+                        Event(event_type="assistant_stream_end", data={})
+                    )
                 # If no tool calls, add assistant message and we're done
                 if not tool_calls:
     tool_router: ToolRouter | None = None,
     session_holder: list | None = None,
     hf_token: str | None = None,
+    local_mode: bool = False,
+    stream: bool = True,
 ) -> None:
     """
     Main agent loop - processes submissions and dispatches to handlers.
     # Create session with tool router
     session = Session(
+        event_queue, config=config, tool_router=tool_router, hf_token=hf_token,
+        local_mode=local_mode, stream=stream,
     )
     if session_holder is not None:
         session_holder[0] = session

agent/core/session.py CHANGED Viewed

@@ -84,9 +84,12 @@ class Session:
         tool_router=None,
         context_manager: ContextManager | None = None,
         hf_token: str | None = None,
     ):
         self.hf_token: Optional[str] = hf_token
         self.tool_router = tool_router
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
             max_context=_get_max_tokens_safe(config.model_name),
@@ -94,6 +97,7 @@ class Session:
             untouched_messages=5,
             tool_specs=tool_specs,
             hf_token=hf_token,
         )
         self.event_queue = event_queue
         self.session_id = str(uuid.uuid4())

         tool_router=None,
         context_manager: ContextManager | None = None,
         hf_token: str | None = None,
+        local_mode: bool = False,
+        stream: bool = True,
     ):
         self.hf_token: Optional[str] = hf_token
         self.tool_router = tool_router
+        self.stream = stream
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
             max_context=_get_max_tokens_safe(config.model_name),
             untouched_messages=5,
             tool_specs=tool_specs,
             hf_token=hf_token,
+            local_mode=local_mode,
         )
         self.event_queue = event_queue
         self.session_id = str(uuid.uuid4())

agent/main.py CHANGED Viewed

@@ -1,10 +1,16 @@
 """
 Interactive CLI chat with the agent
 """
 import asyncio
 import json
 import os
 import time
 from dataclasses import dataclass
 from pathlib import Path
@@ -51,7 +57,7 @@ def _safe_get_args(arguments: dict) -> dict:
 def _get_hf_token() -> str | None:
-    """Get HF token from environment or huggingface_hub cached login."""
     token = os.environ.get("HF_TOKEN")
     if token:
         return token
@@ -63,6 +69,12 @@ def _get_hf_token() -> str | None:
             return token
     except Exception:
         pass
     return None
@@ -123,6 +135,128 @@ class Submission:
     operation: Operation
 async def event_listener(
     event_queue: asyncio.Queue,
     submission_queue: asyncio.Queue,
@@ -134,6 +268,9 @@ async def event_listener(
     """Background task that listens for events and displays them"""
     submission_id = [1000]  # Use list to make it mutable in closure
     last_tool_name = [None]  # Track last tool called
     while True:
         try:
@@ -144,16 +281,22 @@ async def event_listener(
                 print(format_success("\U0001f917 Agent ready"))
                 ready_event.set()
             elif event.event_type == "assistant_message":
                 content = event.data.get("content", "") if event.data else ""
                 if content:
-                    print(f"\nAssistant: {content}")
             elif event.event_type == "assistant_chunk":
                 content = event.data.get("content", "") if event.data else ""
                 if content:
-                    print(content, end="", flush=True)
             elif event.event_type == "assistant_stream_end":
-                print()  # newline after streaming
             elif event.event_type == "tool_call":
                 tool_name = event.data.get("tool", "") if event.data else ""
                 arguments = event.data.get("arguments", {}) if event.data else {}
                 if tool_name:
@@ -167,7 +310,11 @@ async def event_listener(
                     # Don't truncate plan_tool output, truncate everything else
                     should_truncate = last_tool_name[0] != "plan_tool"
                     print(format_tool_output(output, success, truncate=should_truncate))
             elif event.event_type == "turn_complete":
                 print(format_turn_complete())
                 # Display plan after turn complete
                 plan_display = format_plan_display()
@@ -175,6 +322,8 @@ async def event_listener(
                     print(plan_display)
                 turn_complete_event.set()
             elif event.event_type == "interrupted":
                 print("\n(interrupted)")
                 turn_complete_event.set()
             elif event.event_type == "undo_complete":
@@ -191,6 +340,8 @@ async def event_listener(
                 if state in ("approved", "rejected", "running"):
                     print(f"  {tool}: {state}")
             elif event.event_type == "error":
                 error = (
                     event.data.get("error", "Unknown error")
                     if event.data
@@ -199,9 +350,11 @@ async def event_listener(
                 print(format_error(error))
                 turn_complete_event.set()
             elif event.event_type == "shutdown":
                 break
             elif event.event_type == "processing":
-                pass  # print("Processing...", flush=True)
             elif event.event_type == "compacted":
                 old_tokens = event.data.get("old_tokens", 0) if event.data else 0
                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
@@ -670,6 +823,8 @@ async def main():
             tool_router=tool_router,
             session_holder=session_holder,
             hf_token=hf_token,
         )
     )
@@ -762,17 +917,167 @@ async def main():
     )
     await submission_queue.put(shutdown_submission)
     try:
-        await asyncio.wait_for(agent_task, timeout=5.0)
     except asyncio.TimeoutError:
         agent_task.cancel()
     listener_task.cancel()
     print("Goodbye!\n")
 if __name__ == "__main__":
     try:
-        asyncio.run(main())
     except KeyboardInterrupt:
         print("\n\nGoodbye!")

 """
 Interactive CLI chat with the agent
+Supports two modes:
+  Interactive:  python -m agent.main
+  Headless:     python -m agent.main "find me bird datasets"
 """
+import argparse
 import asyncio
 import json
 import os
+import sys
 import time
 from dataclasses import dataclass
 from pathlib import Path
 def _get_hf_token() -> str | None:
+    """Get HF token from environment, huggingface_hub API, or cached token file."""
     token = os.environ.get("HF_TOKEN")
     if token:
         return token
             return token
     except Exception:
         pass
+    # Fallback: read the cached token file directly
+    token_path = Path.home() / ".cache" / "huggingface" / "token"
+    if token_path.exists():
+        token = token_path.read_text().strip()
+        if token:
+            return token
     return None
     operation: Operation
+def _create_rich_console():
+    """Create a rich Console for markdown rendering."""
+    from rich.console import Console
+    return Console(highlight=False)
+def _render_markdown(console, text: str) -> None:
+    """Render markdown text to the terminal via rich."""
+    from rich.markdown import Markdown
+    console.print(Markdown(text))
+class _ThinkingShimmer:
+    """Animated shiny/shimmer thinking indicator — a bright gradient sweeps across the text."""
+    _BASE = (90, 90, 110)       # dim base color
+    _HIGHLIGHT = (255, 200, 80) # bright shimmer highlight (warm gold)
+    _WIDTH = 5                  # shimmer width in characters
+    _FPS = 24
+    def __init__(self, console):
+        self._console = console
+        self._task = None
+        self._running = False
+    def start(self):
+        if self._running:
+            return
+        self._running = True
+        self._task = asyncio.ensure_future(self._animate())
+    def stop(self):
+        self._running = False
+        if self._task:
+            self._task.cancel()
+            self._task = None
+        # Clear the shimmer line
+        self._console.file.write("\r\033[K")
+        self._console.file.flush()
+    def _render_frame(self, text: str, offset: float) -> str:
+        """Render one frame: a bright spot sweeps left-to-right across `text`."""
+        out = []
+        n = len(text)
+        for i, ch in enumerate(text):
+            # Distance from the shimmer center (wraps around)
+            dist = abs(i - offset)
+            wrap_dist = abs(i - offset + n + self._WIDTH)
+            dist = min(dist, wrap_dist, abs(i - offset - n - self._WIDTH))
+            # Blend factor: 1.0 at center, 0.0 beyond _WIDTH
+            t = max(0.0, 1.0 - dist / self._WIDTH)
+            t = t * t * (3 - 2 * t)  # smoothstep
+            r = int(self._BASE[0] + (self._HIGHLIGHT[0] - self._BASE[0]) * t)
+            g = int(self._BASE[1] + (self._HIGHLIGHT[1] - self._BASE[1]) * t)
+            b = int(self._BASE[2] + (self._HIGHLIGHT[2] - self._BASE[2]) * t)
+            out.append(f"\033[38;2;{r};{g};{b}m{ch}")
+        out.append("\033[0m")
+        return "".join(out)
+    async def _animate(self):
+        text = "Thinking..."
+        n = len(text)
+        speed = 0.45  # characters per frame
+        pos = 0.0
+        try:
+            while self._running:
+                frame = self._render_frame(text, pos)
+                self._console.file.write(f"\r{frame}")
+                self._console.file.flush()
+                pos = (pos + speed) % (n + self._WIDTH)
+                await asyncio.sleep(1.0 / self._FPS)
+        except asyncio.CancelledError:
+            pass
+class _StreamBuffer:
+    """Buffers streaming chunks and renders markdown line-by-line via rich Live."""
+    def __init__(self, console):
+        self._console = console
+        self._buffer = ""
+        self._live = None
+        self._lines_printed = 0
+    def _start_live(self):
+        if self._live is None:
+            from rich.live import Live
+            self._live = Live(
+                "",
+                console=self._console,
+                refresh_per_second=8,
+                vertical_overflow="visible",
+            )
+            self._live.start()
+    def add_chunk(self, text: str):
+        self._buffer += text
+        self._start_live()
+        self._update()
+    def _update(self):
+        from rich.markdown import Markdown
+        if self._live:
+            self._live.update(Markdown(self._buffer))
+    def finish(self):
+        """Finalize: stop live display (final frame is already rendered)."""
+        if self._live:
+            self._live.stop()
+            self._live = None
+        self._buffer = ""
+        self._lines_printed = 0
+    def discard(self):
+        """Discard without final render (e.g. for tool-only turns)."""
+        if self._live:
+            self._live.stop()
+            self._live = None
+        self._buffer = ""
+        self._lines_printed = 0
 async def event_listener(
     event_queue: asyncio.Queue,
     submission_queue: asyncio.Queue,
     """Background task that listens for events and displays them"""
     submission_id = [1000]  # Use list to make it mutable in closure
     last_tool_name = [None]  # Track last tool called
+    console = _create_rich_console()
+    spinner = _ThinkingShimmer(console)
+    stream_buf = _StreamBuffer(console)
     while True:
         try:
                 print(format_success("\U0001f917 Agent ready"))
                 ready_event.set()
             elif event.event_type == "assistant_message":
+                # Non-streaming: full message arrives at once
+                spinner.stop()
                 content = event.data.get("content", "") if event.data else ""
                 if content:
+                    console.print()
+                    _render_markdown(console, content)
             elif event.event_type == "assistant_chunk":
+                spinner.stop()
                 content = event.data.get("content", "") if event.data else ""
                 if content:
+                    stream_buf.add_chunk(content)
             elif event.event_type == "assistant_stream_end":
+                stream_buf.finish()
             elif event.event_type == "tool_call":
+                spinner.stop()
+                stream_buf.discard()
                 tool_name = event.data.get("tool", "") if event.data else ""
                 arguments = event.data.get("arguments", {}) if event.data else {}
                 if tool_name:
                     # Don't truncate plan_tool output, truncate everything else
                     should_truncate = last_tool_name[0] != "plan_tool"
                     print(format_tool_output(output, success, truncate=should_truncate))
+                # After tool output, agent will think again
+                spinner.start()
             elif event.event_type == "turn_complete":
+                spinner.stop()
+                stream_buf.discard()
                 print(format_turn_complete())
                 # Display plan after turn complete
                 plan_display = format_plan_display()
                     print(plan_display)
                 turn_complete_event.set()
             elif event.event_type == "interrupted":
+                spinner.stop()
+                stream_buf.discard()
                 print("\n(interrupted)")
                 turn_complete_event.set()
             elif event.event_type == "undo_complete":
                 if state in ("approved", "rejected", "running"):
                     print(f"  {tool}: {state}")
             elif event.event_type == "error":
+                spinner.stop()
+                stream_buf.discard()
                 error = (
                     event.data.get("error", "Unknown error")
                     if event.data
                 print(format_error(error))
                 turn_complete_event.set()
             elif event.event_type == "shutdown":
+                spinner.stop()
+                stream_buf.discard()
                 break
             elif event.event_type == "processing":
+                spinner.start()
             elif event.event_type == "compacted":
                 old_tokens = event.data.get("old_tokens", 0) if event.data else 0
                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
             tool_router=tool_router,
             session_holder=session_holder,
             hf_token=hf_token,
+            local_mode=True,
+            stream=True,
         )
     )
     )
     await submission_queue.put(shutdown_submission)
+    # Wait for agent to finish (the listener must keep draining events
+    # or the agent will block on event_queue.put)
     try:
+        await asyncio.wait_for(agent_task, timeout=10.0)
     except asyncio.TimeoutError:
         agent_task.cancel()
+        # Agent didn't shut down cleanly — close MCP explicitly
+        await tool_router.__aexit__(None, None, None)
+    # Now safe to cancel the listener (agent is done emitting events)
     listener_task.cancel()
     print("Goodbye!\n")
+async def headless_main(prompt: str, model: str | None = None) -> None:
+    """Run a single prompt headlessly and exit."""
+    import logging
+    logging.basicConfig(level=logging.WARNING)
+    hf_token = _get_hf_token()
+    if not hf_token:
+        print("ERROR: No HF token found. Set HF_TOKEN or run `huggingface-cli login`.", file=sys.stderr)
+        sys.exit(1)
+    print(f"HF token loaded", file=sys.stderr)
+    config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
+    config = load_config(config_path)
+    config.yolo_mode = True  # Auto-approve everything in headless mode
+    if model:
+        if model not in VALID_MODEL_IDS:
+            print(f"ERROR: Unknown model '{model}'. Valid: {', '.join(VALID_MODEL_IDS)}", file=sys.stderr)
+            sys.exit(1)
+        config.model_name = model
+    print(f"Model: {config.model_name}", file=sys.stderr)
+    print(f"Prompt: {prompt}", file=sys.stderr)
+    print("---", file=sys.stderr)
+    submission_queue: asyncio.Queue = asyncio.Queue()
+    event_queue: asyncio.Queue = asyncio.Queue()
+    tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)
+    session_holder: list = [None]
+    agent_task = asyncio.create_task(
+        submission_loop(
+            submission_queue,
+            event_queue,
+            config=config,
+            tool_router=tool_router,
+            session_holder=session_holder,
+            hf_token=hf_token,
+            local_mode=True,
+            stream=True,
+        )
+    )
+    # Wait for ready
+    while True:
+        event = await event_queue.get()
+        if event.event_type == "ready":
+            break
+    # Submit the prompt
+    submission = Submission(
+        id="sub_1",
+        operation=Operation(op_type=OpType.USER_INPUT, data={"text": prompt}),
+    )
+    await submission_queue.put(submission)
+    # Process events until turn completes
+    console = _create_rich_console()
+    err_console = _create_rich_console()
+    err_console.file = sys.stderr
+    spinner = _ThinkingShimmer(console)
+    stream_buf = _StreamBuffer(console)
+    spinner.start()
+    while True:
+        event = await event_queue.get()
+        if event.event_type == "assistant_chunk":
+            spinner.stop()
+            content = event.data.get("content", "") if event.data else ""
+            if content:
+                stream_buf.add_chunk(content)
+        elif event.event_type == "assistant_stream_end":
+            stream_buf.finish()
+        elif event.event_type == "assistant_message":
+            spinner.stop()
+            content = event.data.get("content", "") if event.data else ""
+            if content:
+                _render_markdown(console, content)
+        elif event.event_type == "tool_call":
+            spinner.stop()
+            stream_buf.discard()
+            tool_name = event.data.get("tool", "") if event.data else ""
+            arguments = event.data.get("arguments", {}) if event.data else {}
+            if tool_name:
+                args_str = json.dumps(arguments)[:100] + "..."
+                print(format_tool_call(tool_name, args_str), file=sys.stderr)
+        elif event.event_type == "tool_output":
+            output = event.data.get("output", "") if event.data else ""
+            success = event.data.get("success", False) if event.data else False
+            if output:
+                print(format_tool_output(output, success, truncate=True), file=sys.stderr)
+            spinner.start()
+        elif event.event_type == "tool_log":
+            tool = event.data.get("tool", "") if event.data else ""
+            log = event.data.get("log", "") if event.data else ""
+            if log:
+                print(f"  [{tool}] {log}", file=sys.stderr)
+        elif event.event_type == "compacted":
+            old_tokens = event.data.get("old_tokens", 0) if event.data else 0
+            new_tokens = event.data.get("new_tokens", 0) if event.data else 0
+            print(f"Compacted: {old_tokens} -> {new_tokens} tokens", file=sys.stderr)
+        elif event.event_type == "error":
+            spinner.stop()
+            stream_buf.discard()
+            error = event.data.get("error", "Unknown error") if event.data else "Unknown error"
+            print(f"ERROR: {error}", file=sys.stderr)
+            break
+        elif event.event_type in ("turn_complete", "interrupted"):
+            spinner.stop()
+            stream_buf.discard()
+            break
+    # Shutdown
+    shutdown_submission = Submission(
+        id="sub_shutdown", operation=Operation(op_type=OpType.SHUTDOWN)
+    )
+    await submission_queue.put(shutdown_submission)
+    try:
+        await asyncio.wait_for(agent_task, timeout=10.0)
+    except asyncio.TimeoutError:
+        agent_task.cancel()
+        await tool_router.__aexit__(None, None, None)
 if __name__ == "__main__":
+    import logging as _logging
+    import warnings
+    # Suppress aiohttp "Unclosed client session" noise during event loop teardown
+    _logging.getLogger("asyncio").setLevel(_logging.CRITICAL)
+    # Suppress litellm pydantic deprecation warnings
+    warnings.filterwarnings("ignore", category=DeprecationWarning, module="litellm")
+    parser = argparse.ArgumentParser(description="Hugging Face Agent CLI")
+    parser.add_argument("prompt", nargs="?", default=None, help="Run headlessly with this prompt")
+    parser.add_argument("--model", "-m", default=None, help=f"Model to use (default: from config)")
+    args = parser.parse_args()
     try:
+        if args.prompt:
+            asyncio.run(headless_main(args.prompt, model=args.model))
+        else:
+            asyncio.run(main())
     except KeyboardInterrupt:
         print("\n\nGoodbye!")

agent/tools/local_tools.py CHANGED Viewed

@@ -227,7 +227,63 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
     return msg, True
-# ── Public API ──────────────────────────────────────────────────────────
 _HANDLERS = {
     "bash": _bash_handler,
@@ -242,7 +298,7 @@ def get_local_tools():
     from agent.core.tools import ToolSpec
     tools = []
-    for name, spec in Sandbox.TOOLS.items():
         handler = _HANDLERS.get(name)
         if handler is None:
             continue

     return msg, True
+# ── Local tool specs (override sandbox /app references) ────────────────
+_LOCAL_TOOL_SPECS = {
+    "bash": {
+        "description": (
+            "Run a shell command on the local machine and return stdout/stderr.\n"
+            "\n"
+            "Commands run in a shell at the working directory (default: current directory). "
+            "Each invocation is independent.\n"
+            "\n"
+            "AVOID using bash for operations covered by specialized tools:\n"
+            "- File reading: use read (not cat/head/tail)\n"
+            "- File editing: use edit (not sed/awk)\n"
+            "- File writing: use write (not echo/cat <<EOF)\n"
+            "\n"
+            "Chain dependent commands with &&. Independent commands should be "
+            "separate bash calls (they can run in parallel).\n"
+            "\n"
+            "Timeout default 120s, max 600s."
+        ),
+        "parameters": {
+            "type": "object",
+            "required": ["command"],
+            "additionalProperties": False,
+            "properties": {
+                "command": {
+                    "type": "string",
+                    "description": "The shell command to execute.",
+                },
+                "description": {
+                    "type": "string",
+                    "description": "Short description (5-10 words, active voice).",
+                },
+                "work_dir": {
+                    "type": "string",
+                    "description": "Working directory (default: current directory).",
+                },
+                "timeout": {
+                    "type": "integer",
+                    "description": "Timeout in seconds (default: 120, max: 600).",
+                },
+            },
+        },
+    },
+    "read": {
+        "description": Sandbox.TOOLS["read"]["description"],
+        "parameters": Sandbox.TOOLS["read"]["parameters"],
+    },
+    "write": {
+        "description": Sandbox.TOOLS["write"]["description"],
+        "parameters": Sandbox.TOOLS["write"]["parameters"],
+    },
+    "edit": {
+        "description": Sandbox.TOOLS["edit"]["description"],
+        "parameters": Sandbox.TOOLS["edit"]["parameters"],
+    },
+}
 _HANDLERS = {
     "bash": _bash_handler,
     from agent.core.tools import ToolSpec
     tools = []
+    for name, spec in _LOCAL_TOOL_SPECS.items():
         handler = _HANDLERS.get(name)
         if handler is None:
             continue

pyproject.toml CHANGED Viewed

@@ -20,6 +20,7 @@ agent = [
     "fastmcp>=2.4.0",
     "prompt-toolkit>=3.0.0",
     "thefuzz>=0.22.1",
     "nbconvert>=7.16.6",
     "nbformat>=5.10.4",
     "datasets>=4.3.0",  # For session logging to HF datasets

     "fastmcp>=2.4.0",
     "prompt-toolkit>=3.0.0",
     "thefuzz>=0.22.1",
+    "rich>=13.0.0",
     "nbconvert>=7.16.6",
     "nbformat>=5.10.4",
     "datasets>=4.3.0",  # For session logging to HF datasets