ml-intern

Sleeping

App Files Files Community

lewtun HF Staff OpenAI Codex commited on Apr 27

Commit

6155b26

unverified ·

1 Parent(s): 2d4ec20

Add Slack gateway (#116)

Browse files

* Add messaging gateway and notify tool

Co-authored-by: OpenAI Codex <codex@openai.com>

* Handle Bedrock streaming permission denials

Co-authored-by: OpenAI Codex <codex@openai.com>

* Revert "Handle Bedrock streaming permission denials"

Co-authored-by: OpenAI Codex <codex@openai.com>

* Add automatic completion notifications

Co-authored-by: OpenAI Codex <codex@openai.com>

* Auto-attach CLI notification destinations

Co-authored-by: OpenAI Codex <codex@openai.com>

* Defer CLI completion notifications until render

Co-authored-by: OpenAI Codex <codex@openai.com>

* Increase completion notification summary cap

Co-authored-by: OpenAI Codex <codex@openai.com>

* Add Slack user notification defaults

Co-authored-by: Codex <codex@openai.com>

* Increase Slack turn completion summary limit

Co-authored-by: Codex <codex@openai.com>

* Remove legacy auto notification event upgrade

Co-authored-by: Codex <codex@openai.com>

* Require session config instead of hard-coded model fallback

Co-authored-by: Codex <codex@openai.com>

* Address Slack notification review findings

Co-authored-by: Codex <codex@openai.com>

* Fix Anthropic thinking signature replay

Rebuild signed Anthropic thinking blocks from streaming chunks instead of replaying raw deltas, and recover stale histories by retrying once without thinking metadata when Anthropic rejects a signature.

Co-authored-by: OpenAI Codex <codex@openai.com>

* Format Slack notifications with mrkdwn

Convert common Markdown constructs in Slack notification bodies to Slack mrkdwn before posting, while preserving code spans and fenced code blocks.

Co-authored-by: OpenAI Codex <codex@openai.com>

---------

Co-authored-by: OpenAI Codex <codex@openai.com>

Files changed (24) hide show

README.md +50 -0
agent/config.py +108 -3
agent/core/agent_loop.py +132 -14
agent/core/session.py +125 -5
agent/core/tools.py +7 -0
agent/main.py +24 -2
agent/messaging/__init__.py +15 -0
agent/messaging/base.py +27 -0
agent/messaging/gateway.py +166 -0
agent/messaging/models.py +123 -0
agent/messaging/slack.py +186 -0
agent/prompts/system_prompt_v3.yaml +1 -0
agent/tools/notify_tool.py +108 -0
backend/main.py +3 -2
backend/models.py +8 -1
backend/routes/agent.py +21 -1
backend/session_manager.py +46 -1
configs/cli_agent_config.json +5 -0
pyproject.toml +1 -1
tests/unit/test_cli_rendering.py +1 -1
tests/unit/test_config.py +121 -0
tests/unit/test_messaging.py +511 -0
tests/unit/test_thinking_history.py +51 -6
uv.lock +1 -1

README.md CHANGED Viewed

@@ -56,6 +56,56 @@ ml-intern --max-iterations 100 "your prompt"
 ml-intern --no-stream "your prompt"
 ```
 ## Architecture
 ### Component Overview

 ml-intern --no-stream "your prompt"
 ```
+## Supported Gateways
+ML Intern currently supports one-way notification gateways from CLI sessions.
+These gateways send out-of-band status updates; they do not accept inbound chat
+messages.
+### Slack
+Slack notifications use the Slack Web API to post messages when the agent needs
+approval, hits an error, or completes a turn. Create a Slack app with a bot token
+that has `chat:write`, invite the bot to the target channel, then set:
+```bash
+SLACK_BOT_TOKEN=xoxb-...
+SLACK_CHANNEL_ID=C...
+```
+The CLI automatically creates a `slack.default` destination when both variables
+are present. Optional environment variables for the env-only default:
+```bash
+ML_INTERN_SLACK_NOTIFICATIONS=false
+ML_INTERN_SLACK_DESTINATION=slack.ops
+ML_INTERN_SLACK_AUTO_EVENTS=approval_required,error,turn_complete
+ML_INTERN_SLACK_ALLOW_AGENT_TOOL=true
+ML_INTERN_SLACK_ALLOW_AUTO_EVENTS=true
+```
+For a persistent user-level config, put overrides in
+`~/.config/ml-intern/cli_agent_config.json` or point `ML_INTERN_CLI_CONFIG` at a
+JSON file:
+```json
+{
+  "messaging": {
+    "enabled": true,
+    "auto_event_types": ["approval_required", "error", "turn_complete"],
+    "destinations": {
+      "slack.ops": {
+        "provider": "slack",
+        "token": "${SLACK_BOT_TOKEN}",
+        "channel": "${SLACK_CHANNEL_ID}",
+        "allow_agent_tool": true,
+        "allow_auto_events": true
+      }
+    }
+  }
+}
+```
 ## Architecture
 ### Component Overview

agent/config.py CHANGED Viewed

@@ -6,6 +6,8 @@ from typing import Any, Union
 from dotenv import load_dotenv
 # Project root: two levels up from this file (agent/config.py -> project root)
 _PROJECT_ROOT = Path(__file__).resolve().parent.parent
 from fastmcp.mcp_config import (
@@ -47,6 +49,104 @@ class Config(BaseModel):
     # ``xhigh`` or ``max`` for Anthropic 4.6 / 4.7). ``None`` = thinking off.
     # Valid values: None | "minimal" | "low" | "medium" | "high" | "xhigh" | "max"
     reasoning_effort: str | None = "max"
 def substitute_env_vars(obj: Any) -> Any:
@@ -86,7 +186,10 @@ def substitute_env_vars(obj: Any) -> Any:
     return obj
-def load_config(config_path: str = "config.json") -> Config:
     """
     Load configuration with environment variable substitution.
@@ -98,8 +201,10 @@ def load_config(config_path: str = "config.json") -> Config:
     load_dotenv(_PROJECT_ROOT / ".env")
     load_dotenv(override=False)
-    with open(config_path, "r") as f:
-        raw_config = json.load(f)
     config_with_env = substitute_env_vars(raw_config)
     return Config.model_validate(config_with_env)

 from dotenv import load_dotenv
+from agent.messaging.models import MessagingConfig
 # Project root: two levels up from this file (agent/config.py -> project root)
 _PROJECT_ROOT = Path(__file__).resolve().parent.parent
 from fastmcp.mcp_config import (
     # ``xhigh`` or ``max`` for Anthropic 4.6 / 4.7). ``None`` = thinking off.
     # Valid values: None | "minimal" | "low" | "medium" | "high" | "xhigh" | "max"
     reasoning_effort: str | None = "max"
+    messaging: MessagingConfig = MessagingConfig()
+USER_CONFIG_ENV_VAR = "ML_INTERN_CLI_CONFIG"
+DEFAULT_USER_CONFIG_PATH = Path.home() / ".config" / "ml-intern" / "cli_agent_config.json"
+SLACK_DEFAULT_DESTINATION = "slack.default"
+SLACK_DEFAULT_AUTO_EVENT_TYPES = ["approval_required", "error", "turn_complete"]
+def _deep_merge_config(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
+    merged = dict(base)
+    for key, value in override.items():
+        current = merged.get(key)
+        if isinstance(current, dict) and isinstance(value, dict):
+            merged[key] = _deep_merge_config(current, value)
+        else:
+            merged[key] = value
+    return merged
+def _load_json_config(path: Path) -> dict[str, Any]:
+    with open(path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, dict):
+        raise ValueError(f"Config file {path} must contain a JSON object")
+    return data
+def _load_user_config() -> dict[str, Any]:
+    raw_path = os.environ.get(USER_CONFIG_ENV_VAR)
+    if raw_path:
+        path = Path(raw_path).expanduser()
+        if not path.exists():
+            raise FileNotFoundError(
+                f"{USER_CONFIG_ENV_VAR} points to missing config file: {path}"
+            )
+        return _load_json_config(path)
+    if DEFAULT_USER_CONFIG_PATH.exists():
+        return _load_json_config(DEFAULT_USER_CONFIG_PATH)
+    return {}
+def _env_bool(name: str, default: bool) -> bool:
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    normalized = value.strip().lower()
+    if normalized in {"1", "true", "yes", "on"}:
+        return True
+    if normalized in {"0", "false", "no", "off"}:
+        return False
+    return default
+def _env_list(name: str) -> list[str] | None:
+    value = os.environ.get(name)
+    if value is None:
+        return None
+    return [item.strip() for item in value.split(",") if item.strip()]
+def apply_slack_user_defaults(raw_config: dict[str, Any]) -> dict[str, Any]:
+    """Enable a default Slack destination from user env vars, when present."""
+    if not _env_bool("ML_INTERN_SLACK_NOTIFICATIONS", True):
+        return raw_config
+    token = os.environ.get("SLACK_BOT_TOKEN")
+    channel = os.environ.get("SLACK_CHANNEL_ID") or os.environ.get("SLACK_CHANNEL")
+    if not token or not channel:
+        return raw_config
+    config = dict(raw_config)
+    messaging = dict(config.get("messaging") or {})
+    destinations = dict(messaging.get("destinations") or {})
+    destination_name = (
+        os.environ.get("ML_INTERN_SLACK_DESTINATION") or SLACK_DEFAULT_DESTINATION
+    ).strip()
+    if destination_name not in destinations:
+        destinations[destination_name] = {
+            "provider": "slack",
+            "token": token,
+            "channel": channel,
+            "allow_agent_tool": _env_bool("ML_INTERN_SLACK_ALLOW_AGENT_TOOL", True),
+            "allow_auto_events": _env_bool("ML_INTERN_SLACK_ALLOW_AUTO_EVENTS", True),
+        }
+    auto_events = _env_list("ML_INTERN_SLACK_AUTO_EVENTS")
+    if auto_events is not None:
+        messaging["auto_event_types"] = auto_events
+    elif "auto_event_types" not in messaging:
+        messaging["auto_event_types"] = SLACK_DEFAULT_AUTO_EVENT_TYPES
+    messaging["enabled"] = True
+    messaging["destinations"] = destinations
+    config["messaging"] = messaging
+    return config
 def substitute_env_vars(obj: Any) -> Any:
     return obj
+def load_config(
+    config_path: str = "config.json",
+    include_user_defaults: bool = False,
+) -> Config:
     """
     Load configuration with environment variable substitution.
     load_dotenv(_PROJECT_ROOT / ".env")
     load_dotenv(override=False)
+    raw_config = _load_json_config(Path(config_path))
+    if include_user_defaults:
+        raw_config = _deep_merge_config(raw_config, _load_user_config())
+        raw_config = apply_slack_user_defaults(raw_config)
     config_with_env = substitute_env_vars(raw_config)
     return Config.model_validate(config_with_env)

agent/core/agent_loop.py CHANGED Viewed

@@ -19,6 +19,7 @@ from litellm import (
 from litellm.exceptions import ContextWindowExceededError
 from agent.config import Config
 from agent.core import telemetry
 from agent.core.doom_loop import check_for_doom_loop
 from agent.core.llm_params import _resolve_llm_params
@@ -432,6 +433,103 @@ def _should_replay_thinking_state(model_name: str | None) -> bool:
     return bool(model_name and model_name.startswith("anthropic/"))
 def _assistant_message_from_result(
     llm_result: LLMResult,
     *,
@@ -457,6 +555,7 @@ async def _call_llm_streaming(session: Session, messages, tools, llm_params) ->
     """Call the LLM with streaming, emitting assistant_chunk events."""
     response = None
     _healed_effort = False  # one-shot safety net per call
     messages, tools = with_prompt_caching(messages, tools, llm_params.get("model"))
     t_start = time.monotonic()
     for _llm_attempt in range(_MAX_LLM_RETRIES):
@@ -484,6 +583,14 @@ async def _call_llm_streaming(session: Session, messages, tools, llm_params) ->
                     data={"tool": "system", "log": "Reasoning effort not supported for this model — adjusting and retrying."},
                 ))
                 continue
             _delay = _retry_delay_for(e, _llm_attempt)
             if _llm_attempt < _MAX_LLM_RETRIES - 1 and _delay is not None:
                 logger.warning(
@@ -505,8 +612,6 @@ async def _call_llm_streaming(session: Session, messages, tools, llm_params) ->
     final_usage_chunk = None
     chunks = []
     should_replay_thinking = _should_replay_thinking_state(llm_params.get("model"))
-    collected_thinking_blocks: list[dict[str, Any]] = []
-    collected_reasoning_content: list[str] = []
     async for chunk in response:
         chunks.append(chunk)
@@ -525,13 +630,6 @@ async def _call_llm_streaming(session: Session, messages, tools, llm_params) ->
         if choice.finish_reason:
             finish_reason = choice.finish_reason
-        if should_replay_thinking:
-            delta_thinking_blocks, delta_reasoning_content = _extract_thinking_state(delta)
-            if delta_thinking_blocks:
-                collected_thinking_blocks.extend(delta_thinking_blocks)
-            if delta_reasoning_content:
-                collected_reasoning_content.append(delta_reasoning_content)
         if delta.content:
             full_content += delta.content
             await session.send_event(
@@ -565,9 +663,9 @@ async def _call_llm_streaming(session: Session, messages, tools, llm_params) ->
         latency_ms=int((time.monotonic() - t_start) * 1000),
         finish_reason=finish_reason,
     )
-    thinking_blocks = collected_thinking_blocks or None
-    reasoning_content = "".join(collected_reasoning_content) or None
-    if chunks and should_replay_thinking and not (thinking_blocks or reasoning_content):
         try:
             rebuilt = stream_chunk_builder(chunks, messages=messages)
             if rebuilt and getattr(rebuilt, "choices", None):
@@ -591,6 +689,7 @@ async def _call_llm_non_streaming(session: Session, messages, tools, llm_params)
     """Call the LLM without streaming, emit assistant_message at the end."""
     response = None
     _healed_effort = False
     messages, tools = with_prompt_caching(messages, tools, llm_params.get("model"))
     t_start = time.monotonic()
     for _llm_attempt in range(_MAX_LLM_RETRIES):
@@ -617,6 +716,14 @@ async def _call_llm_non_streaming(session: Session, messages, tools, llm_params)
                     data={"tool": "system", "log": "Reasoning effort not supported for this model — adjusting and retrying."},
                 ))
                 continue
             _delay = _retry_delay_for(e, _llm_attempt)
             if _llm_attempt < _MAX_LLM_RETRIES - 1 and _delay is not None:
                 logger.warning(
@@ -1128,7 +1235,12 @@ class Handlers:
             await session.send_event(
                 Event(
                     event_type="turn_complete",
-                    data={"history_size": len(session.context_manager.items)},
                 )
             )
@@ -1437,13 +1549,16 @@ async def process_submission(session: Session, submission) -> bool:
 async def submission_loop(
     submission_queue: asyncio.Queue,
     event_queue: asyncio.Queue,
-    config: Config | None = None,
     tool_router: ToolRouter | None = None,
     session_holder: list | None = None,
     hf_token: str | None = None,
     user_id: str | None = None,
     local_mode: bool = False,
     stream: bool = True,
 ) -> None:
     """
     Main agent loop - processes submissions and dispatches to handlers.
@@ -1454,6 +1569,9 @@ async def submission_loop(
     session = Session(
         event_queue, config=config, tool_router=tool_router, hf_token=hf_token,
         user_id=user_id, local_mode=local_mode, stream=stream,
     )
     if session_holder is not None:
         session_holder[0] = session

 from litellm.exceptions import ContextWindowExceededError
 from agent.config import Config
+from agent.messaging.gateway import NotificationGateway
 from agent.core import telemetry
 from agent.core.doom_loop import check_for_doom_loop
 from agent.core.llm_params import _resolve_llm_params
     return bool(model_name and model_name.startswith("anthropic/"))
+def _is_invalid_thinking_signature_error(exc: Exception) -> bool:
+    """Return True when Anthropic rejected replayed extended-thinking state."""
+    text = str(exc)
+    return (
+        "Invalid `signature` in `thinking` block" in text
+        or "Invalid signature in thinking block" in text
+    )
+def _strip_thinking_state_from_messages(messages: list[Any]) -> int:
+    """Remove replayed thinking metadata from assistant history messages."""
+    stripped = 0
+    for message in messages:
+        role = (
+            message.get("role")
+            if isinstance(message, dict)
+            else getattr(message, "role", None)
+        )
+        if role != "assistant":
+            continue
+        if isinstance(message, dict):
+            if message.pop("thinking_blocks", None) is not None:
+                stripped += 1
+            if message.pop("reasoning_content", None) is not None:
+                stripped += 1
+            provider_fields = message.get("provider_specific_fields")
+            content = message.get("content")
+        else:
+            if getattr(message, "thinking_blocks", None) is not None:
+                message.thinking_blocks = None
+                stripped += 1
+            if getattr(message, "reasoning_content", None) is not None:
+                message.reasoning_content = None
+                stripped += 1
+            provider_fields = getattr(message, "provider_specific_fields", None)
+            content = getattr(message, "content", None)
+        if isinstance(provider_fields, dict):
+            cleaned_fields = dict(provider_fields)
+            if cleaned_fields.pop("thinking_blocks", None) is not None:
+                stripped += 1
+            if cleaned_fields.pop("reasoning_content", None) is not None:
+                stripped += 1
+            if cleaned_fields != provider_fields:
+                if isinstance(message, dict):
+                    message["provider_specific_fields"] = cleaned_fields
+                else:
+                    message.provider_specific_fields = cleaned_fields
+        if isinstance(content, list):
+            cleaned_content = [
+                block
+                for block in content
+                if not (
+                    isinstance(block, dict)
+                    and block.get("type") in {"thinking", "redacted_thinking"}
+                )
+            ]
+            if len(cleaned_content) != len(content):
+                stripped += len(content) - len(cleaned_content)
+                if isinstance(message, dict):
+                    message["content"] = cleaned_content
+                else:
+                    message.content = cleaned_content
+    return stripped
+async def _maybe_heal_invalid_thinking_signature(
+    session: Session,
+    messages: list[Any],
+    exc: Exception,
+    *,
+    already_healed: bool,
+) -> bool:
+    if already_healed or not _is_invalid_thinking_signature_error(exc):
+        return False
+    stripped = _strip_thinking_state_from_messages(messages)
+    if not stripped:
+        return False
+    await session.send_event(Event(
+        event_type="tool_log",
+        data={
+            "tool": "system",
+            "log": (
+                "Anthropic rejected stale thinking signatures; retrying "
+                "without replayed thinking metadata."
+            ),
+        },
+    ))
+    return True
 def _assistant_message_from_result(
     llm_result: LLMResult,
     *,
     """Call the LLM with streaming, emitting assistant_chunk events."""
     response = None
     _healed_effort = False  # one-shot safety net per call
+    _healed_thinking_signature = False
     messages, tools = with_prompt_caching(messages, tools, llm_params.get("model"))
     t_start = time.monotonic()
     for _llm_attempt in range(_MAX_LLM_RETRIES):
                     data={"tool": "system", "log": "Reasoning effort not supported for this model — adjusting and retrying."},
                 ))
                 continue
+            if await _maybe_heal_invalid_thinking_signature(
+                session,
+                messages,
+                e,
+                already_healed=_healed_thinking_signature,
+            ):
+                _healed_thinking_signature = True
+                continue
             _delay = _retry_delay_for(e, _llm_attempt)
             if _llm_attempt < _MAX_LLM_RETRIES - 1 and _delay is not None:
                 logger.warning(
     final_usage_chunk = None
     chunks = []
     should_replay_thinking = _should_replay_thinking_state(llm_params.get("model"))
     async for chunk in response:
         chunks.append(chunk)
         if choice.finish_reason:
             finish_reason = choice.finish_reason
         if delta.content:
             full_content += delta.content
             await session.send_event(
         latency_ms=int((time.monotonic() - t_start) * 1000),
         finish_reason=finish_reason,
     )
+    thinking_blocks = None
+    reasoning_content = None
+    if chunks and should_replay_thinking:
         try:
             rebuilt = stream_chunk_builder(chunks, messages=messages)
             if rebuilt and getattr(rebuilt, "choices", None):
     """Call the LLM without streaming, emit assistant_message at the end."""
     response = None
     _healed_effort = False
+    _healed_thinking_signature = False
     messages, tools = with_prompt_caching(messages, tools, llm_params.get("model"))
     t_start = time.monotonic()
     for _llm_attempt in range(_MAX_LLM_RETRIES):
                     data={"tool": "system", "log": "Reasoning effort not supported for this model — adjusting and retrying."},
                 ))
                 continue
+            if await _maybe_heal_invalid_thinking_signature(
+                session,
+                messages,
+                e,
+                already_healed=_healed_thinking_signature,
+            ):
+                _healed_thinking_signature = True
+                continue
             _delay = _retry_delay_for(e, _llm_attempt)
             if _llm_attempt < _MAX_LLM_RETRIES - 1 and _delay is not None:
                 logger.warning(
             await session.send_event(
                 Event(
                     event_type="turn_complete",
+                    data={
+                        "history_size": len(session.context_manager.items),
+                        "final_response": final_response
+                        if isinstance(final_response, str)
+                        else None,
+                    },
                 )
             )
 async def submission_loop(
     submission_queue: asyncio.Queue,
     event_queue: asyncio.Queue,
+    config: Config,
     tool_router: ToolRouter | None = None,
     session_holder: list | None = None,
     hf_token: str | None = None,
     user_id: str | None = None,
     local_mode: bool = False,
     stream: bool = True,
+    notification_gateway: NotificationGateway | None = None,
+    notification_destinations: list[str] | None = None,
+    defer_turn_complete_notification: bool = False,
 ) -> None:
     """
     Main agent loop - processes submissions and dispatches to handlers.
     session = Session(
         event_queue, config=config, tool_router=tool_router, hf_token=hf_token,
         user_id=user_id, local_mode=local_mode, stream=stream,
+        notification_gateway=notification_gateway,
+        notification_destinations=notification_destinations,
+        defer_turn_complete_notification=defer_turn_complete_notification,
     )
     if session_holder is not None:
         session_holder[0] = session

agent/core/session.py CHANGED Viewed

@@ -12,10 +12,13 @@ from typing import Any, Optional
 from agent.config import Config
 from agent.context_manager.manager import ContextManager
 logger = logging.getLogger(__name__)
 _DEFAULT_MAX_TOKENS = 200_000
 def _get_max_tokens_safe(model_name: str) -> int:
@@ -73,18 +76,24 @@ class Session:
     def __init__(
         self,
         event_queue: asyncio.Queue,
-        config: Config | None = None,
         tool_router=None,
         context_manager: ContextManager | None = None,
         hf_token: str | None = None,
         local_mode: bool = False,
         stream: bool = True,
         user_id: str | None = None,
     ):
         self.hf_token: Optional[str] = hf_token
         self.user_id: Optional[str] = user_id
         self.tool_router = tool_router
         self.stream = stream
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
             model_max_tokens=_get_max_tokens_safe(config.model_name),
@@ -95,15 +104,16 @@ class Session:
             local_mode=local_mode,
         )
         self.event_queue = event_queue
-        self.session_id = str(uuid.uuid4())
-        self.config = config or Config(
-            model_name="bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0",
-        )
         self.is_running = True
         self._cancelled = asyncio.Event()
         self.pending_approval: Optional[dict[str, Any]] = None
         self.sandbox = None
         self._running_job_ids: set[str] = set()  # HF job IDs currently executing
         # Session trajectory logging
         self.logged_events: list[dict] = []
@@ -138,11 +148,121 @@ class Session:
                 "data": event.data,
             }
         )
         # Mid-turn heartbeat flush (owned by telemetry module).
         from agent.core.telemetry import HeartbeatSaver
         HeartbeatSaver.maybe_fire(self)
     def cancel(self) -> None:
         """Signal cancellation to the running agent loop."""
         self._cancelled.set()

 from agent.config import Config
 from agent.context_manager.manager import ContextManager
+from agent.messaging.gateway import NotificationGateway
+from agent.messaging.models import NotificationRequest
 logger = logging.getLogger(__name__)
 _DEFAULT_MAX_TOKENS = 200_000
+_TURN_COMPLETE_NOTIFICATION_CHARS = 39000
 def _get_max_tokens_safe(model_name: str) -> int:
     def __init__(
         self,
         event_queue: asyncio.Queue,
+        config: Config,
         tool_router=None,
         context_manager: ContextManager | None = None,
         hf_token: str | None = None,
         local_mode: bool = False,
         stream: bool = True,
+        notification_gateway: NotificationGateway | None = None,
+        notification_destinations: list[str] | None = None,
+        defer_turn_complete_notification: bool = False,
+        session_id: str | None = None,
         user_id: str | None = None,
     ):
         self.hf_token: Optional[str] = hf_token
         self.user_id: Optional[str] = user_id
         self.tool_router = tool_router
         self.stream = stream
+        if config is None:
+            raise ValueError("Session requires a Config")
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
             model_max_tokens=_get_max_tokens_safe(config.model_name),
             local_mode=local_mode,
         )
         self.event_queue = event_queue
+        self.session_id = session_id or str(uuid.uuid4())
+        self.config = config
         self.is_running = True
         self._cancelled = asyncio.Event()
         self.pending_approval: Optional[dict[str, Any]] = None
         self.sandbox = None
         self._running_job_ids: set[str] = set()  # HF job IDs currently executing
+        self.notification_gateway = notification_gateway
+        self.notification_destinations = list(notification_destinations or [])
+        self.defer_turn_complete_notification = defer_turn_complete_notification
         # Session trajectory logging
         self.logged_events: list[dict] = []
                 "data": event.data,
             }
         )
+        await self._enqueue_auto_notification_requests(event)
         # Mid-turn heartbeat flush (owned by telemetry module).
         from agent.core.telemetry import HeartbeatSaver
         HeartbeatSaver.maybe_fire(self)
+    def set_notification_destinations(self, destinations: list[str]) -> None:
+        """Replace the session's opted-in auto-notification destinations."""
+        deduped: list[str] = []
+        seen: set[str] = set()
+        for destination in destinations:
+            if destination not in seen:
+                deduped.append(destination)
+                seen.add(destination)
+        self.notification_destinations = deduped
+    async def send_deferred_turn_complete_notification(self, event: Event) -> None:
+        if event.event_type != "turn_complete":
+            return
+        await self._enqueue_auto_notification_requests(
+            event,
+            include_deferred_turn_complete=True,
+        )
+    async def _enqueue_auto_notification_requests(
+        self,
+        event: Event,
+        include_deferred_turn_complete: bool = False,
+    ) -> None:
+        if self.notification_gateway is None:
+            return
+        if not self.notification_destinations:
+            return
+        auto_events = set(self.config.messaging.auto_event_types)
+        if event.event_type not in auto_events:
+            return
+        if (
+            self.defer_turn_complete_notification
+            and event.event_type == "turn_complete"
+            and not include_deferred_turn_complete
+        ):
+            return
+        requests = self._build_auto_notification_requests(event)
+        for request in requests:
+            await self.notification_gateway.enqueue(request)
+    def _build_auto_notification_requests(
+        self, event: Event
+    ) -> list[NotificationRequest]:
+        metadata = {
+            "session_id": self.session_id,
+            "model": self.config.model_name,
+            "event_type": event.event_type,
+        }
+        title: str | None = None
+        message: str | None = None
+        severity = "info"
+        data = event.data or {}
+        if event.event_type == "approval_required":
+            tools = data.get("tools", [])
+            tool_names = []
+            for tool in tools if isinstance(tools, list) else []:
+                if isinstance(tool, dict):
+                    tool_name = str(tool.get("tool") or "").strip()
+                    if tool_name and tool_name not in tool_names:
+                        tool_names.append(tool_name)
+            count = len(tools) if isinstance(tools, list) else 0
+            title = "Agent approval required"
+            message = (
+                f"Session {self.session_id} is waiting for approval "
+                f"for {count} tool call(s)."
+            )
+            if tool_names:
+                message += " Tools: " + ", ".join(tool_names)
+            severity = "warning"
+        elif event.event_type == "error":
+            title = "Agent error"
+            error = str(data.get("error") or "Unknown error")
+            message = f"Session {self.session_id} hit an error.\n{error[:500]}"
+            severity = "error"
+        elif event.event_type == "turn_complete":
+            title = "Agent task complete"
+            summary = str(data.get("final_response") or "").strip()
+            if summary:
+                summary = summary[:_TURN_COMPLETE_NOTIFICATION_CHARS]
+                message = (
+                    f"Session {self.session_id} completed successfully.\n"
+                    f"{summary}"
+                )
+            else:
+                message = f"Session {self.session_id} completed successfully."
+            severity = "success"
+        if message is None:
+            return []
+        requests: list[NotificationRequest] = []
+        for destination in self.notification_destinations:
+            if not self.config.messaging.can_auto_send(destination):
+                continue
+            requests.append(
+                NotificationRequest(
+                    destination=destination,
+                    title=title,
+                    message=message,
+                    severity=severity,
+                    metadata=metadata,
+                    event_type=event.event_type,
+                )
+            )
+        return requests
     def cancel(self) -> None:
         """Signal cancellation to the running agent loop."""
         self._cancelled.set()

agent/core/tools.py CHANGED Viewed

@@ -46,6 +46,7 @@ from agent.tools.hf_repo_git_tool import (
     hf_repo_git_handler,
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.papers_tool import HF_PAPERS_TOOL_SPEC, hf_papers_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler
@@ -324,6 +325,12 @@ def create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]:
             parameters=PLAN_TOOL_SPEC["parameters"],
             handler=plan_tool_handler,
         ),
         ToolSpec(
             name=HF_JOBS_TOOL_SPEC["name"],
             description=HF_JOBS_TOOL_SPEC["description"],

     hf_repo_git_handler,
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
+from agent.tools.notify_tool import NOTIFY_TOOL_SPEC, notify_handler
 from agent.tools.papers_tool import HF_PAPERS_TOOL_SPEC, hf_papers_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler
             parameters=PLAN_TOOL_SPEC["parameters"],
             handler=plan_tool_handler,
         ),
+        ToolSpec(
+            name=NOTIFY_TOOL_SPEC["name"],
+            description=NOTIFY_TOOL_SPEC["description"],
+            parameters=NOTIFY_TOOL_SPEC["parameters"],
+            handler=notify_handler,
+        ),
         ToolSpec(
             name=HF_JOBS_TOOL_SPEC["name"],
             description=HF_JOBS_TOOL_SPEC["description"],

agent/main.py CHANGED Viewed

@@ -26,6 +26,7 @@ from agent.core import model_switcher
 from agent.core.hf_tokens import resolve_hf_token
 from agent.core.session import OpType
 from agent.core.tools import ToolRouter
 from agent.utils.reliability_checks import check_training_script_save_pattern
 from agent.utils.terminal_display import (
     get_console,
@@ -332,6 +333,9 @@ async def event_listener(
                 stream_buf.discard()
                 print_turn_complete()
                 print_plan()
                 turn_complete_event.set()
             elif event.event_type == "interrupted":
                 shimmer.stop()
@@ -821,7 +825,7 @@ async def main(model: str | None = None):
     if not hf_token:
         hf_token = await _prompt_and_save_hf_token(prompt_session)
-    config = load_config(CLI_CONFIG_PATH)
     if model:
         config.model_name = model
@@ -844,6 +848,8 @@ async def main(model: str | None = None):
     turn_complete_event.set()
     ready_event = asyncio.Event()
     # Create tool router with local mode
     tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)
@@ -861,6 +867,9 @@ async def main(model: str | None = None):
             user_id=hf_user,
             local_mode=True,
             stream=True,
         )
     )
@@ -1016,6 +1025,8 @@ async def main(model: str | None = None):
         agent_task.cancel()
         # Agent didn't shut down cleanly — close MCP explicitly
         await tool_router.__aexit__(None, None, None)
     # Now safe to cancel the listener (agent is done emitting events)
     listener_task.cancel()
@@ -1042,8 +1053,10 @@ async def headless_main(
     print(f"HF token loaded", file=sys.stderr)
-    config = load_config(CLI_CONFIG_PATH)
     config.yolo_mode = True  # Auto-approve everything in headless mode
     hf_user = _get_hf_user(hf_token)
     if model:
@@ -1074,6 +1087,9 @@ async def headless_main(
             user_id=hf_user,
             local_mode=True,
             stream=stream,
         )
     )
@@ -1199,6 +1215,10 @@ async def headless_main(
             stream_buf.discard()
             history_size = event.data.get("history_size", "?") if event.data else "?"
             print(f"\n--- Agent {event.event_type} (history_size={history_size}) ---", file=sys.stderr)
             break
     # Shutdown
@@ -1212,6 +1232,8 @@ async def headless_main(
     except asyncio.TimeoutError:
         agent_task.cancel()
         await tool_router.__aexit__(None, None, None)
 def cli():

 from agent.core.hf_tokens import resolve_hf_token
 from agent.core.session import OpType
 from agent.core.tools import ToolRouter
+from agent.messaging.gateway import NotificationGateway
 from agent.utils.reliability_checks import check_training_script_save_pattern
 from agent.utils.terminal_display import (
     get_console,
                 stream_buf.discard()
                 print_turn_complete()
                 print_plan()
+                session = session_holder[0] if session_holder else None
+                if session is not None:
+                    await session.send_deferred_turn_complete_notification(event)
                 turn_complete_event.set()
             elif event.event_type == "interrupted":
                 shimmer.stop()
     if not hf_token:
         hf_token = await _prompt_and_save_hf_token(prompt_session)
+    config = load_config(CLI_CONFIG_PATH, include_user_defaults=True)
     if model:
         config.model_name = model
     turn_complete_event.set()
     ready_event = asyncio.Event()
+    notification_gateway = NotificationGateway(config.messaging)
+    await notification_gateway.start()
     # Create tool router with local mode
     tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)
             user_id=hf_user,
             local_mode=True,
             stream=True,
+            notification_gateway=notification_gateway,
+            notification_destinations=config.messaging.default_auto_destinations(),
+            defer_turn_complete_notification=True,
         )
     )
         agent_task.cancel()
         # Agent didn't shut down cleanly — close MCP explicitly
         await tool_router.__aexit__(None, None, None)
+    finally:
+        await notification_gateway.close()
     # Now safe to cancel the listener (agent is done emitting events)
     listener_task.cancel()
     print(f"HF token loaded", file=sys.stderr)
+    config = load_config(CLI_CONFIG_PATH, include_user_defaults=True)
     config.yolo_mode = True  # Auto-approve everything in headless mode
+    notification_gateway = NotificationGateway(config.messaging)
+    await notification_gateway.start()
     hf_user = _get_hf_user(hf_token)
     if model:
             user_id=hf_user,
             local_mode=True,
             stream=stream,
+            notification_gateway=notification_gateway,
+            notification_destinations=config.messaging.default_auto_destinations(),
+            defer_turn_complete_notification=True,
         )
     )
             stream_buf.discard()
             history_size = event.data.get("history_size", "?") if event.data else "?"
             print(f"\n--- Agent {event.event_type} (history_size={history_size}) ---", file=sys.stderr)
+            if event.event_type == "turn_complete":
+                session = session_holder[0] if session_holder else None
+                if session is not None:
+                    await session.send_deferred_turn_complete_notification(event)
             break
     # Shutdown
     except asyncio.TimeoutError:
         agent_task.cancel()
         await tool_router.__aexit__(None, None, None)
+    finally:
+        await notification_gateway.close()
 def cli():

agent/messaging/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from agent.messaging.gateway import NotificationGateway
+from agent.messaging.models import (
+    MessagingConfig,
+    NotificationRequest,
+    NotificationResult,
+    SUPPORTED_AUTO_EVENT_TYPES,
+)
+__all__ = [
+    "MessagingConfig",
+    "NotificationGateway",
+    "NotificationRequest",
+    "NotificationResult",
+    "SUPPORTED_AUTO_EVENT_TYPES",
+]

agent/messaging/base.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from abc import ABC, abstractmethod
+import httpx
+from agent.messaging.models import DestinationConfig, NotificationRequest, NotificationResult
+class NotificationError(Exception):
+    """Delivery failed and should not be retried."""
+class RetryableNotificationError(NotificationError):
+    """Delivery failed transiently and can be retried."""
+class NotificationProvider(ABC):
+    provider_name: str
+    @abstractmethod
+    async def send(
+        self,
+        client: httpx.AsyncClient,
+        destination_name: str,
+        destination: DestinationConfig,
+        request: NotificationRequest,
+    ) -> NotificationResult:
+        """Deliver a notification to one destination."""

agent/messaging/gateway.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import asyncio
+import logging
+from collections.abc import Iterable
+import httpx
+from agent.messaging.base import (
+    NotificationError,
+    NotificationProvider,
+    RetryableNotificationError,
+)
+from agent.messaging.models import (
+    MessagingConfig,
+    NotificationRequest,
+    NotificationResult,
+)
+from agent.messaging.slack import SlackProvider
+logger = logging.getLogger(__name__)
+_RETRY_DELAYS = (1, 2, 4)
+class NotificationGateway:
+    def __init__(self, config: MessagingConfig):
+        self.config = config
+        self._providers: dict[str, NotificationProvider] = {
+            "slack": SlackProvider(),
+        }
+        self._queue: asyncio.Queue[NotificationRequest] = asyncio.Queue()
+        self._worker_task: asyncio.Task | None = None
+        self._client: httpx.AsyncClient | None = None
+    @property
+    def enabled(self) -> bool:
+        return self.config.enabled
+    async def start(self) -> None:
+        if not self.enabled or self._worker_task is not None:
+            return
+        self._client = httpx.AsyncClient(timeout=10.0)
+        self._worker_task = asyncio.create_task(self._worker(), name="notification-gateway")
+    async def flush(self) -> None:
+        if not self.enabled:
+            return
+        await self._queue.join()
+    async def close(self) -> None:
+        if not self.enabled:
+            return
+        await self.flush()
+        if self._worker_task is not None:
+            self._worker_task.cancel()
+            try:
+                await self._worker_task
+            except asyncio.CancelledError:
+                pass
+            self._worker_task = None
+        if self._client is not None:
+            await self._client.aclose()
+            self._client = None
+    async def send(self, request: NotificationRequest) -> NotificationResult:
+        if not self.enabled:
+            return NotificationResult(
+                destination=request.destination,
+                ok=False,
+                provider="disabled",
+                error="Messaging is disabled",
+            )
+        destination = self.config.get_destination(request.destination)
+        if destination is None:
+            return NotificationResult(
+                destination=request.destination,
+                ok=False,
+                provider="unknown",
+                error=f"Unknown destination '{request.destination}'",
+            )
+        provider = self._providers.get(destination.provider)
+        if provider is None:
+            return NotificationResult(
+                destination=request.destination,
+                ok=False,
+                provider=destination.provider,
+                error=f"No provider implementation for '{destination.provider}'",
+            )
+        return await self._send_with_retries(provider, request.destination, destination, request)
+    async def send_many(
+        self, requests: Iterable[NotificationRequest]
+    ) -> list[NotificationResult]:
+        results: list[NotificationResult] = []
+        for request in requests:
+            results.append(await self.send(request))
+        return results
+    async def enqueue(self, request: NotificationRequest) -> bool:
+        if not self.enabled or self._worker_task is None:
+            return False
+        await self._queue.put(request)
+        return True
+    async def _worker(self) -> None:
+        while True:
+            request = await self._queue.get()
+            try:
+                result = await self.send(request)
+                if not result.ok:
+                    logger.warning(
+                        "Notification delivery failed for %s: %s",
+                        request.destination,
+                        result.error,
+                    )
+            except Exception:
+                logger.exception("Unexpected notification worker failure")
+            finally:
+                self._queue.task_done()
+    async def _send_with_retries(
+        self,
+        provider: NotificationProvider,
+        destination_name: str,
+        destination,
+        request: NotificationRequest,
+    ) -> NotificationResult:
+        client = self._client or httpx.AsyncClient(timeout=10.0)
+        owns_client = self._client is None
+        try:
+            for attempt in range(len(_RETRY_DELAYS) + 1):
+                try:
+                    return await provider.send(client, destination_name, destination, request)
+                except RetryableNotificationError as exc:
+                    if attempt >= len(_RETRY_DELAYS):
+                        return NotificationResult(
+                            destination=destination_name,
+                            ok=False,
+                            provider=provider.provider_name,
+                            error=str(exc),
+                        )
+                    delay = _RETRY_DELAYS[attempt]
+                    logger.warning(
+                        "Retrying notification to %s in %ss after transient error: %s",
+                        destination_name,
+                        delay,
+                        exc,
+                    )
+                    await asyncio.sleep(delay)
+                except NotificationError as exc:
+                    return NotificationResult(
+                        destination=destination_name,
+                        ok=False,
+                        provider=provider.provider_name,
+                        error=str(exc),
+                    )
+            return NotificationResult(
+                destination=destination_name,
+                ok=False,
+                provider=provider.provider_name,
+                error="Notification delivery exhausted retries",
+            )
+        finally:
+            if owns_client:
+                await client.aclose()

agent/messaging/models.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from typing import Annotated, Literal
+from pydantic import BaseModel, Field, field_validator, model_validator
+_DESTINATION_NAME_CHARS = set("abcdefghijklmnopqrstuvwxyz0123456789._-")
+SUPPORTED_AUTO_EVENT_TYPES = {"approval_required", "error", "turn_complete"}
+class SlackDestinationConfig(BaseModel):
+    provider: Literal["slack"] = "slack"
+    token: str
+    channel: str
+    allow_agent_tool: bool = False
+    allow_auto_events: bool = False
+    username: str | None = None
+    icon_emoji: str | None = None
+    @field_validator("token", "channel")
+    @classmethod
+    def _require_non_empty(cls, value: str) -> str:
+        value = value.strip()
+        if not value:
+            raise ValueError("must not be empty")
+        return value
+DestinationConfig = Annotated[SlackDestinationConfig, Field(discriminator="provider")]
+class MessagingConfig(BaseModel):
+    enabled: bool = False
+    auto_event_types: list[str] = Field(
+        default_factory=lambda: ["approval_required", "error", "turn_complete"]
+    )
+    destinations: dict[str, DestinationConfig] = Field(default_factory=dict)
+    @field_validator("destinations")
+    @classmethod
+    def _validate_destination_names(
+        cls, destinations: dict[str, DestinationConfig]
+    ) -> dict[str, DestinationConfig]:
+        for name in destinations:
+            if not name or any(char not in _DESTINATION_NAME_CHARS for char in name):
+                raise ValueError(
+                    "destination names must use lowercase letters, digits, '.', '_' or '-'"
+                )
+        return destinations
+    @field_validator("auto_event_types")
+    @classmethod
+    def _validate_auto_event_types(cls, event_types: list[str]) -> list[str]:
+        if not event_types:
+            return []
+        normalized: list[str] = []
+        seen: set[str] = set()
+        for event_type in event_types:
+            if event_type not in SUPPORTED_AUTO_EVENT_TYPES:
+                raise ValueError(
+                    f"unsupported auto event type '{event_type}'"
+                )
+            if event_type not in seen:
+                normalized.append(event_type)
+                seen.add(event_type)
+        return normalized
+    @model_validator(mode="after")
+    def _require_destinations_when_enabled(self) -> "MessagingConfig":
+        if self.enabled and not self.destinations:
+            raise ValueError("messaging.enabled requires at least one destination")
+        return self
+    def get_destination(self, name: str) -> DestinationConfig | None:
+        return self.destinations.get(name)
+    def can_agent_tool_send(self, name: str) -> bool:
+        destination = self.get_destination(name)
+        return bool(destination and destination.allow_agent_tool)
+    def can_auto_send(self, name: str) -> bool:
+        destination = self.get_destination(name)
+        return bool(destination and destination.allow_auto_events)
+    def default_auto_destinations(self) -> list[str]:
+        if not self.enabled:
+            return []
+        return [
+            name
+            for name in self.destinations
+            if self.can_auto_send(name)
+        ]
+class NotificationRequest(BaseModel):
+    destination: str
+    title: str | None = None
+    message: str
+    severity: Literal["info", "success", "warning", "error"] = "info"
+    metadata: dict[str, str] = Field(default_factory=dict)
+    event_type: str | None = None
+    @field_validator("destination", "message")
+    @classmethod
+    def _require_text(cls, value: str) -> str:
+        value = value.strip()
+        if not value:
+            raise ValueError("must not be empty")
+        return value
+    @field_validator("title")
+    @classmethod
+    def _normalize_title(cls, value: str | None) -> str | None:
+        if value is None:
+            return None
+        value = value.strip()
+        return value or None
+class NotificationResult(BaseModel):
+    destination: str
+    ok: bool
+    provider: str
+    error: str | None = None
+    external_id: str | None = None

agent/messaging/slack.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import json
+import re
+import httpx
+from agent.messaging.base import (
+    NotificationError,
+    NotificationProvider,
+    RetryableNotificationError,
+)
+from agent.messaging.models import (
+    NotificationRequest,
+    NotificationResult,
+    SlackDestinationConfig,
+)
+_SEVERITY_PREFIX = {
+    "info": "[INFO]",
+    "success": "[SUCCESS]",
+    "warning": "[WARNING]",
+    "error": "[ERROR]",
+}
+def _format_slack_mrkdwn(content: str) -> str:
+    """Convert common Markdown constructs to Slack's mrkdwn syntax."""
+    if not content:
+        return content
+    placeholders: dict[str, str] = {}
+    placeholder_index = 0
+    def placeholder(value: str) -> str:
+        nonlocal placeholder_index
+        key = f"\x00SLACK{placeholder_index}\x00"
+        placeholder_index += 1
+        placeholders[key] = value
+        return key
+    text = content
+    # Protect code before any formatting conversion. Slack's mrkdwn ignores
+    # formatting inside backticks, so these regions should stay byte-for-byte.
+    text = re.sub(
+        r"(```(?:[^\n]*\n)?[\s\S]*?```)",
+        lambda match: placeholder(match.group(0)),
+        text,
+    )
+    text = re.sub(r"(`[^`\n]+`)", lambda match: placeholder(match.group(0)), text)
+    def convert_markdown_link(match: re.Match[str]) -> str:
+        label = match.group(1)
+        url = match.group(2).strip()
+        if url.startswith("<") and url.endswith(">"):
+            url = url[1:-1].strip()
+        return placeholder(f"<{url}|{label}>")
+    text = re.sub(
+        r"\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)",
+        convert_markdown_link,
+        text,
+    )
+    # Preserve existing Slack entities and manual mrkdwn links before escaping.
+    text = re.sub(
+        r"(<(?:[@#!]|(?:https?|mailto|tel):)[^>\n]+>)",
+        lambda match: placeholder(match.group(1)),
+        text,
+    )
+    text = re.sub(
+        r"^(>+\s)",
+        lambda match: placeholder(match.group(0)),
+        text,
+        flags=re.MULTILINE,
+    )
+    text = text.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">")
+    text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+    def convert_header(match: re.Match[str]) -> str:
+        header = match.group(1).strip()
+        header = re.sub(r"\*\*(.+?)\*\*", r"\1", header)
+        return placeholder(f"*{header}*")
+    text = re.sub(r"^#{1,6}\s+(.+)$", convert_header, text, flags=re.MULTILINE)
+    text = re.sub(
+        r"\*\*\*(.+?)\*\*\*",
+        lambda match: placeholder(f"*_{match.group(1)}_*"),
+        text,
+    )
+    text = re.sub(
+        r"\*\*(.+?)\*\*",
+        lambda match: placeholder(f"*{match.group(1)}*"),
+        text,
+    )
+    text = re.sub(
+        r"(?<!\*)\*([^*\n]+)\*(?!\*)",
+        lambda match: placeholder(f"_{match.group(1)}_"),
+        text,
+    )
+    text = re.sub(
+        r"~~(.+?)~~",
+        lambda match: placeholder(f"~{match.group(1)}~"),
+        text,
+    )
+    for key in reversed(placeholders):
+        text = text.replace(key, placeholders[key])
+    return text
+def _format_text(request: NotificationRequest) -> str:
+    lines: list[str] = []
+    prefix = _SEVERITY_PREFIX[request.severity]
+    if request.title:
+        lines.append(f"{prefix} {request.title}")
+    else:
+        lines.append(prefix)
+    lines.append(request.message)
+    for key, value in request.metadata.items():
+        lines.append(f"{key}: {value}")
+    return _format_slack_mrkdwn("\n".join(lines))
+class SlackProvider(NotificationProvider):
+    provider_name = "slack"
+    async def send(
+        self,
+        client: httpx.AsyncClient,
+        destination_name: str,
+        destination: SlackDestinationConfig,
+        request: NotificationRequest,
+    ) -> NotificationResult:
+        payload = {
+            "channel": destination.channel,
+            "text": _format_text(request),
+            "mrkdwn": True,
+            "unfurl_links": False,
+            "unfurl_media": False,
+        }
+        if destination.username:
+            payload["username"] = destination.username
+        if destination.icon_emoji:
+            payload["icon_emoji"] = destination.icon_emoji
+        try:
+            response = await client.post(
+                "https://slack.com/api/chat.postMessage",
+                headers={
+                    "Authorization": f"Bearer {destination.token}",
+                    "Content-Type": "application/json; charset=utf-8",
+                },
+                content=json.dumps(payload),
+            )
+        except httpx.TimeoutException as exc:
+            raise RetryableNotificationError("Slack request timed out") from exc
+        except httpx.TransportError as exc:
+            raise RetryableNotificationError("Slack transport error") from exc
+        if response.status_code == 429 or response.status_code >= 500:
+            raise RetryableNotificationError(
+                f"Slack HTTP {response.status_code}"
+            )
+        if response.status_code >= 400:
+            raise NotificationError(f"Slack HTTP {response.status_code}")
+        try:
+            data = response.json()
+        except ValueError as exc:
+            raise RetryableNotificationError("Slack returned invalid JSON") from exc
+        if not data.get("ok"):
+            error = str(data.get("error") or "unknown_error")
+            if error == "ratelimited":
+                raise RetryableNotificationError(error)
+            raise NotificationError(error)
+        return NotificationResult(
+            destination=destination_name,
+            ok=True,
+            provider=self.provider_name,
+            external_id=str(data.get("ts") or ""),
+            error=None,
+        )

agent/prompts/system_prompt_v3.yaml CHANGED Viewed

@@ -157,6 +157,7 @@ system_prompt: |
   - Always include direct Hub URLs when referencing models, datasets, Spaces, or jobs.
   - For errors: state what went wrong, why, and what you're doing to fix it.
   - Do not over-explain or present elaborate option menus for simple tasks. When the user's intent is clear, act on it. Present options only when there's genuine ambiguity.
   # Tool usage

   - Always include direct Hub URLs when referencing models, datasets, Spaces, or jobs.
   - For errors: state what went wrong, why, and what you're doing to fix it.
   - Do not over-explain or present elaborate option menus for simple tasks. When the user's intent is clear, act on it. Present options only when there's genuine ambiguity.
+  - Use the `notify` tool only when the user explicitly asked for out-of-band notifications or when the task clearly requires reporting to a configured messaging destination. Do not use it for routine chat updates.
   # Tool usage

agent/tools/notify_tool.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from typing import Any
+from agent.messaging.models import NotificationRequest
+NOTIFY_TOOL_SPEC = {
+    "name": "notify",
+    "description": (
+        "Send an out-of-band notification to configured messaging destinations. "
+        "Use this only when the user explicitly asked for proactive notifications "
+        "or when the task requires reporting progress outside the chat. "
+        "Destinations must be named server-side configs such as 'slack.ops'."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "destinations": {
+                "type": "array",
+                "description": "Named messaging destinations to notify.",
+                "items": {"type": "string"},
+                "minItems": 1,
+            },
+            "message": {
+                "type": "string",
+                "description": "Main notification body.",
+            },
+            "title": {
+                "type": "string",
+                "description": "Optional short title line.",
+            },
+            "severity": {
+                "type": "string",
+                "enum": ["info", "success", "warning", "error"],
+                "description": "Notification severity label.",
+            },
+        },
+        "required": ["destinations", "message"],
+    },
+}
+async def notify_handler(
+    arguments: dict[str, Any], session=None, **_kwargs
+) -> tuple[str, bool]:
+    if session is None or session.notification_gateway is None:
+        return "Messaging is not configured for this session.", False
+    raw_destinations = arguments.get("destinations", [])
+    if not isinstance(raw_destinations, list) or not raw_destinations:
+        return "destinations must be a non-empty array of destination names.", False
+    destinations: list[str] = []
+    seen: set[str] = set()
+    for raw_name in raw_destinations:
+        if not isinstance(raw_name, str):
+            return "Each destination must be a string.", False
+        name = raw_name.strip()
+        if not name:
+            return "Destination names must not be empty.", False
+        if name not in seen:
+            destinations.append(name)
+            seen.add(name)
+    disallowed = [
+        name
+        for name in destinations
+        if not session.config.messaging.can_agent_tool_send(name)
+    ]
+    if disallowed:
+        return (
+            "These destinations are unavailable for the notify tool: "
+            + ", ".join(disallowed)
+        ), False
+    message = arguments.get("message", "")
+    if not isinstance(message, str) or not message.strip():
+        return "message must be a non-empty string.", False
+    title = arguments.get("title")
+    severity = arguments.get("severity", "info")
+    if title is not None and not isinstance(title, str):
+        return "title must be a string when provided.", False
+    if severity not in {"info", "success", "warning", "error"}:
+        return "severity must be one of: info, success, warning, error.", False
+    requests = [
+        NotificationRequest(
+            destination=name,
+            title=title,
+            message=message,
+            severity=severity,
+            metadata={
+                "session_id": session.session_id,
+                "model": session.config.model_name,
+            },
+        )
+        for name in destinations
+    ]
+    results = await session.notification_gateway.send_many(requests)
+    lines = []
+    all_ok = True
+    for result in results:
+        if result.ok:
+            lines.append(f"{result.destination}: sent")
+        else:
+            all_ok = False
+            lines.append(f"{result.destination}: failed ({result.error})")
+    return "\n".join(lines), all_ok

backend/main.py CHANGED Viewed

@@ -11,6 +11,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from routes.agent import router as agent_router
 from routes.auth import router as auth_router
 # Load .env from project root (parent directory)
 load_dotenv(Path(__file__).parent.parent / ".env")
@@ -27,6 +28,7 @@ logger = logging.getLogger(__name__)
 async def lifespan(app: FastAPI):
     """Application lifespan handler."""
     logger.info("Starting HF Agent backend...")
     # Start in-process hourly KPI rollup. Replaces an external cron so the
     # rollup lives next to the data and reuses the Space's HF token.
     try:
@@ -34,7 +36,6 @@ async def lifespan(app: FastAPI):
         kpis_scheduler.start()
     except Exception as e:
         logger.warning("KPI scheduler failed to start: %s", e)
     yield
     logger.info("Shutting down HF Agent backend...")
@@ -47,7 +48,6 @@ async def lifespan(app: FastAPI):
     # Final-flush: save every still-active session so we don't lose traces on
     # server restart. Uploads are detached subprocesses — this is fast.
     try:
-        from session_manager import session_manager
         for sid, agent_session in list(session_manager.sessions.items()):
             sess = agent_session.session
             if sess.config.save_sessions:
@@ -58,6 +58,7 @@ async def lifespan(app: FastAPI):
                     logger.warning("Failed to flush session %s: %s", sid, e)
     except Exception as e:
         logger.warning("Lifespan final-flush skipped: %s", e)
 app = FastAPI(

 from fastapi.staticfiles import StaticFiles
 from routes.agent import router as agent_router
 from routes.auth import router as auth_router
+from session_manager import session_manager
 # Load .env from project root (parent directory)
 load_dotenv(Path(__file__).parent.parent / ".env")
 async def lifespan(app: FastAPI):
     """Application lifespan handler."""
     logger.info("Starting HF Agent backend...")
+    await session_manager.start()
     # Start in-process hourly KPI rollup. Replaces an external cron so the
     # rollup lives next to the data and reuses the Space's HF token.
     try:
         kpis_scheduler.start()
     except Exception as e:
         logger.warning("KPI scheduler failed to start: %s", e)
     yield
     logger.info("Shutting down HF Agent backend...")
     # Final-flush: save every still-active session so we don't lose traces on
     # server restart. Uploads are detached subprocesses — this is fast.
     try:
         for sid, agent_session in list(session_manager.sessions.items()):
             sess = agent_session.session
             if sess.config.save_sessions:
                     logger.warning("Failed to flush session %s: %s", sid, e)
     except Exception as e:
         logger.warning("Lifespan final-flush skipped: %s", e)
+    await session_manager.close()
 app = FastAPI(

backend/models.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from enum import Enum
 from typing import Any
-from pydantic import BaseModel
 class OpType(str, Enum):
@@ -87,6 +87,13 @@ class SessionInfo(BaseModel):
     user_id: str = "dev"
     pending_approval: list[PendingApprovalTool] | None = None
     model: str | None = None
 class HealthResponse(BaseModel):

 from enum import Enum
 from typing import Any
+from pydantic import BaseModel, Field
 class OpType(str, Enum):
     user_id: str = "dev"
     pending_approval: list[PendingApprovalTool] | None = None
     model: str | None = None
+    notification_destinations: list[str] = Field(default_factory=list)
+class SessionNotificationsRequest(BaseModel):
+    """Replace the session's auto-notification destinations."""
+    destinations: list[str]
 class HealthResponse(BaseModel):

backend/routes/agent.py CHANGED Viewed

@@ -24,6 +24,7 @@ from models import (
     HealthResponse,
     LLMHealthResponse,
     SessionInfo,
     SessionResponse,
     SubmitRequest,
     TruncateRequest,
@@ -513,6 +514,26 @@ async def set_session_model(
     return {"session_id": session_id, "model": model_id}
 @router.get("/user/quota")
 async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
     """Return the user's plan tier and today's Claude-session quota state."""
@@ -824,7 +845,6 @@ async def shutdown_session(
         raise HTTPException(status_code=404, detail="Session not found or inactive")
     return {"status": "shutdown_requested", "session_id": session_id}
 @router.post("/feedback/{session_id}")
 async def submit_feedback(
     session_id: str,

     HealthResponse,
     LLMHealthResponse,
     SessionInfo,
+    SessionNotificationsRequest,
     SessionResponse,
     SubmitRequest,
     TruncateRequest,
     return {"session_id": session_id, "model": model_id}
+@router.post("/session/{session_id}/notifications")
+async def set_session_notifications(
+    session_id: str,
+    body: SessionNotificationsRequest,
+    user: dict = Depends(get_current_user),
+) -> dict:
+    """Replace the session's auto-notification destinations."""
+    _check_session_access(session_id, user)
+    try:
+        destinations = session_manager.set_notification_destinations(
+            session_id, body.destinations
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    return {
+        "session_id": session_id,
+        "notification_destinations": destinations,
+    }
 @router.get("/user/quota")
 async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
     """Return the user's plan tier and today's Claude-session quota state."""
         raise HTTPException(status_code=404, detail="Session not found or inactive")
     return {"status": "shutdown_requested", "session_id": session_id}
 @router.post("/feedback/{session_id}")
 async def submit_feedback(
     session_id: str,

backend/session_manager.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import Any, Optional
 from agent.config import load_config
 from agent.core.agent_loop import process_submission
 from agent.core.session import Event, OpType, Session
 from agent.core.tools import ToolRouter
@@ -119,9 +120,18 @@ class SessionManager:
     def __init__(self, config_path: str | None = None) -> None:
         self.config = load_config(config_path or DEFAULT_CONFIG_PATH)
         self.sessions: dict[str, AgentSession] = {}
         self._lock = asyncio.Lock()
     def _count_user_sessions(self, user_id: str) -> int:
         """Count active sessions owned by a specific user."""
         return sum(
@@ -192,7 +202,11 @@ class SessionManager:
                 session_config.model_name = model
             session = Session(
                 event_queue, config=session_config, tool_router=tool_router,
-                hf_token=hf_token, user_id=user_id,
             )
             t1 = _time.monotonic()
             logger.info(f"Session initialized in {t1 - t0:.2f}s")
@@ -518,8 +532,39 @@ class SessionManager:
             "user_id": agent_session.user_id,
             "pending_approval": pending_approval,
             "model": agent_session.session.config.model_name,
         }
     def list_sessions(self, user_id: str | None = None) -> list[dict[str, Any]]:
         """List sessions, optionally filtered by user.

 from agent.config import load_config
 from agent.core.agent_loop import process_submission
+from agent.messaging.gateway import NotificationGateway
 from agent.core.session import Event, OpType, Session
 from agent.core.tools import ToolRouter
     def __init__(self, config_path: str | None = None) -> None:
         self.config = load_config(config_path or DEFAULT_CONFIG_PATH)
+        self.messaging_gateway = NotificationGateway(self.config.messaging)
         self.sessions: dict[str, AgentSession] = {}
         self._lock = asyncio.Lock()
+    async def start(self) -> None:
+        """Start shared background resources."""
+        await self.messaging_gateway.start()
+    async def close(self) -> None:
+        """Flush and close shared background resources."""
+        await self.messaging_gateway.close()
     def _count_user_sessions(self, user_id: str) -> int:
         """Count active sessions owned by a specific user."""
         return sum(
                 session_config.model_name = model
             session = Session(
                 event_queue, config=session_config, tool_router=tool_router,
+                hf_token=hf_token,
+                user_id=user_id,
+                notification_gateway=self.messaging_gateway,
+                notification_destinations=[],
+                session_id=session_id,
             )
             t1 = _time.monotonic()
             logger.info(f"Session initialized in {t1 - t0:.2f}s")
             "user_id": agent_session.user_id,
             "pending_approval": pending_approval,
             "model": agent_session.session.config.model_name,
+            "notification_destinations": list(
+                agent_session.session.notification_destinations
+            ),
         }
+    def set_notification_destinations(
+        self, session_id: str, destinations: list[str]
+    ) -> list[str]:
+        """Replace the session's opted-in auto-notification destinations."""
+        agent_session = self.sessions.get(session_id)
+        if not agent_session or not agent_session.is_active:
+            raise ValueError("Session not found or inactive")
+        normalized: list[str] = []
+        seen: set[str] = set()
+        for raw_name in destinations:
+            name = raw_name.strip()
+            if not name:
+                raise ValueError("Destination names must not be empty")
+            destination = self.config.messaging.get_destination(name)
+            if destination is None:
+                raise ValueError(f"Unknown destination '{name}'")
+            if not destination.allow_auto_events:
+                raise ValueError(
+                    f"Destination '{name}' is not enabled for auto events"
+                )
+            if name not in seen:
+                normalized.append(name)
+                seen.add(name)
+        agent_session.session.set_notification_destinations(normalized)
+        return normalized
     def list_sessions(self, user_id: str | None = None) -> list[dict[str, Any]]:
         """List sessions, optionally filtered by user.

configs/cli_agent_config.json CHANGED Viewed

@@ -5,6 +5,11 @@
   "yolo_mode": false,
   "confirm_cpu_jobs": true,
   "auto_file_upload": true,
   "mcpServers": {
     "hf-mcp-server": {
       "transport": "http",

   "yolo_mode": false,
   "confirm_cpu_jobs": true,
   "auto_file_upload": true,
+  "messaging": {
+    "enabled": false,
+    "auto_event_types": ["approval_required", "error", "turn_complete"],
+    "destinations": {}
+  },
   "mcpServers": {
     "hf-mcp-server": {
       "transport": "http",

pyproject.toml CHANGED Viewed

@@ -42,7 +42,7 @@ eval = [
 # Development and testing dependencies
 dev = [
     "pytest>=9.0.2",
-    "pytest-asyncio>=0.26.0",
 ]
 # All dependencies (eval + dev)

 # Development and testing dependencies
 dev = [
     "pytest>=9.0.2",
+    "pytest-asyncio>=1.2.0",
 ]
 # All dependencies (eval + dev)

tests/unit/test_cli_rendering.py CHANGED Viewed

@@ -79,7 +79,7 @@ async def test_interactive_main_applies_model_override_before_banner(monkeypatch
     monkeypatch.setattr(
         main_mod,
         "load_config",
-        lambda _path: SimpleNamespace(
             model_name="moonshotai/Kimi-K2.6",
             mcpServers={},
         ),

     monkeypatch.setattr(
         main_mod,
         "load_config",
+        lambda _path, **_kwargs: SimpleNamespace(
             model_name="moonshotai/Kimi-K2.6",
             mcpServers={},
         ),

tests/unit/test_config.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import json
+from agent import config as config_module
+def _write_json(path, data):
+    path.write_text(json.dumps(data), encoding="utf-8")
+def test_load_config_does_not_apply_slack_user_defaults_by_default(tmp_path, monkeypatch):
+    config_path = tmp_path / "config.json"
+    _write_json(
+        config_path,
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": False,
+                "destinations": {},
+            },
+        },
+    )
+    monkeypatch.setenv("SLACK_BOT_TOKEN", "xoxb-test")
+    monkeypatch.setenv("SLACK_CHANNEL_ID", "C123")
+    config = config_module.load_config(str(config_path))
+    assert not config.messaging.enabled
+    assert config.messaging.destinations == {}
+def test_load_config_applies_slack_user_defaults_from_env(tmp_path, monkeypatch):
+    config_path = tmp_path / "config.json"
+    _write_json(config_path, {"model_name": "moonshotai/Kimi-K2.6"})
+    monkeypatch.delenv("ML_INTERN_CLI_CONFIG", raising=False)
+    monkeypatch.setattr(
+        config_module,
+        "DEFAULT_USER_CONFIG_PATH",
+        tmp_path / "missing-user-config.json",
+    )
+    monkeypatch.setenv("SLACK_BOT_TOKEN", "xoxb-test")
+    monkeypatch.setenv("SLACK_CHANNEL_ID", "C123")
+    config = config_module.load_config(str(config_path), include_user_defaults=True)
+    assert config.messaging.enabled
+    assert config.messaging.auto_event_types == [
+        "approval_required",
+        "error",
+        "turn_complete",
+    ]
+    destination = config.messaging.destinations["slack.default"]
+    assert destination.token == "xoxb-test"
+    assert destination.channel == "C123"
+    assert destination.allow_agent_tool
+    assert destination.allow_auto_events
+def test_load_config_merges_user_config_before_env_substitution(tmp_path, monkeypatch):
+    config_path = tmp_path / "config.json"
+    user_config_path = tmp_path / "user-config.json"
+    _write_json(config_path, {"model_name": "moonshotai/Kimi-K2.6"})
+    _write_json(
+        user_config_path,
+        {
+            "messaging": {
+                "enabled": True,
+                "auto_event_types": ["approval_required"],
+                "destinations": {
+                    "slack.team": {
+                        "provider": "slack",
+                        "token": "${USER_SLACK_TOKEN}",
+                        "channel": "C999",
+                        "allow_agent_tool": False,
+                        "allow_auto_events": True,
+                    },
+                },
+            },
+        },
+    )
+    monkeypatch.setenv("ML_INTERN_CLI_CONFIG", str(user_config_path))
+    monkeypatch.setenv("ML_INTERN_SLACK_NOTIFICATIONS", "0")
+    monkeypatch.setenv("USER_SLACK_TOKEN", "xoxb-user")
+    config = config_module.load_config(str(config_path), include_user_defaults=True)
+    assert config.messaging.enabled
+    assert config.messaging.auto_event_types == ["approval_required"]
+    assert set(config.messaging.destinations) == {"slack.team"}
+    destination = config.messaging.destinations["slack.team"]
+    assert destination.token == "xoxb-user"
+    assert destination.channel == "C999"
+    assert not destination.allow_agent_tool
+    assert destination.allow_auto_events
+def test_slack_user_defaults_can_be_disabled(tmp_path, monkeypatch):
+    config_path = tmp_path / "config.json"
+    _write_json(
+        config_path,
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": False,
+                "destinations": {},
+            },
+        },
+    )
+    monkeypatch.delenv("ML_INTERN_CLI_CONFIG", raising=False)
+    monkeypatch.setattr(
+        config_module,
+        "DEFAULT_USER_CONFIG_PATH",
+        tmp_path / "missing-user-config.json",
+    )
+    monkeypatch.setenv("ML_INTERN_SLACK_NOTIFICATIONS", "false")
+    monkeypatch.setenv("SLACK_BOT_TOKEN", "xoxb-test")
+    monkeypatch.setenv("SLACK_CHANNEL_ID", "C123")
+    config = config_module.load_config(str(config_path), include_user_defaults=True)
+    assert not config.messaging.enabled
+    assert config.messaging.destinations == {}

tests/unit/test_messaging.py ADDED Viewed

	@@ -0,0 +1,511 @@

+import asyncio
+import json
+from pathlib import Path
+from types import SimpleNamespace
+import httpx
+import pytest
+from pydantic import ValidationError
+from agent.config import Config
+from agent.core.session import Event, Session
+from agent.messaging.gateway import NotificationGateway
+from agent.messaging.models import NotificationRequest, NotificationResult
+from agent.messaging.slack import SlackProvider, _format_slack_mrkdwn
+from agent.tools.notify_tool import notify_handler
+from backend.session_manager import AgentSession, SessionManager
+class DummyToolRouter:
+    def get_tool_specs_for_llm(self) -> list[dict]:
+        return []
+class RecordingGateway:
+    def __init__(self):
+        self.enqueued: list[NotificationRequest] = []
+        self.sent: list[NotificationRequest] = []
+    async def enqueue(self, request: NotificationRequest) -> bool:
+        self.enqueued.append(request)
+        return True
+    async def send_many(
+        self, requests: list[NotificationRequest]
+    ) -> list[NotificationResult]:
+        self.sent.extend(requests)
+        return [
+            NotificationResult(
+                destination=request.destination,
+                ok=True,
+                provider="test",
+            )
+            for request in requests
+        ]
+def _config_with_messaging(**destination_overrides) -> Config:
+    destination = {
+        "provider": "slack",
+        "token": "xoxb-test",
+        "channel": "C123",
+        **destination_overrides,
+    }
+    return Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "destinations": {
+                    "slack.ops": destination,
+                },
+            },
+        }
+    )
+def _test_session(
+    config: Config, gateway, session_id: str = "session-test"
+) -> Session:
+    return Session(
+        asyncio.Queue(),
+        config=config,
+        tool_router=DummyToolRouter(),
+        context_manager=SimpleNamespace(items=[]),
+        notification_gateway=gateway,
+        session_id=session_id,
+    )
+def test_messaging_config_validates_destination_names():
+    with pytest.raises(ValidationError):
+        Config.model_validate(
+            {
+                "model_name": "moonshotai/Kimi-K2.6",
+                "messaging": {
+                    "enabled": True,
+                    "destinations": {
+                        "Slack Ops": {
+                            "provider": "slack",
+                            "token": "x",
+                            "channel": "C123",
+                        }
+                    },
+                },
+            }
+        )
+    config = _config_with_messaging(allow_agent_tool=True, allow_auto_events=True)
+    assert config.messaging.can_agent_tool_send("slack.ops")
+    assert config.messaging.can_auto_send("slack.ops")
+def test_messaging_config_default_auto_destinations_only_returns_auto_enabled():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    },
+                    "slack.tool": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C999",
+                        "allow_agent_tool": True,
+                    },
+                },
+            },
+        }
+    )
+    assert config.messaging.default_auto_destinations() == ["slack.ops"]
+def test_messaging_config_default_auto_destinations_empty_when_disabled():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": False,
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    },
+                },
+            },
+        }
+    )
+    assert config.messaging.default_auto_destinations() == []
+def test_slack_mrkdwn_formatter_converts_common_markdown():
+    formatted = _format_slack_mrkdwn(
+        "# Result\n"
+        "**Done** with *details* and ~~old text~~.\n"
+        "See [PR](https://github.com/huggingface/ml-intern/pull/116).\n"
+        "Keep `**literal**` and ```python\nx < 3\n``` untouched.\n"
+        "Escape <raw> & text."
+    )
+    assert "*Result*" in formatted
+    assert "*Done*" in formatted
+    assert "_details_" in formatted
+    assert "~old text~" in formatted
+    assert "<https://github.com/huggingface/ml-intern/pull/116|PR>" in formatted
+    assert "`**literal**`" in formatted
+    assert "```python\nx < 3\n```" in formatted
+    assert "Escape &lt;raw&gt; &amp; text." in formatted
+@pytest.mark.asyncio
+async def test_slack_provider_formats_and_sends_payload():
+    seen: dict[str, object] = {}
+    def handler(request: httpx.Request) -> httpx.Response:
+        seen["auth"] = request.headers["Authorization"]
+        seen["content_type"] = request.headers["Content-Type"]
+        seen["json"] = request.read().decode("utf-8")
+        return httpx.Response(200, json={"ok": True, "ts": "123.456"})
+    async with httpx.AsyncClient(transport=httpx.MockTransport(handler)) as client:
+        provider = SlackProvider()
+        result = await provider.send(
+            client,
+            "slack.ops",
+            _config_with_messaging().messaging.destinations["slack.ops"],
+            NotificationRequest(
+                destination="slack.ops",
+                title="Approval required",
+                message="A **run** is waiting. See [details](https://example.com).",
+                severity="warning",
+                metadata={"session_id": "sess-1"},
+            ),
+        )
+    assert result.ok
+    assert result.external_id == "123.456"
+    assert seen["auth"] == "Bearer xoxb-test"
+    assert seen["content_type"].startswith("application/json")
+    payload = json.loads(str(seen["json"]))
+    assert payload["channel"] == "C123"
+    assert payload["mrkdwn"] is True
+    assert payload["text"] == (
+        "[WARNING] Approval required\n"
+        "A *run* is waiting. See <https://example.com|details>.\n"
+        "session_id: sess-1"
+    )
+@pytest.mark.asyncio
+async def test_notification_gateway_retries_transient_failures(monkeypatch):
+    attempts = {"count": 0}
+    def handler(_request: httpx.Request) -> httpx.Response:
+        attempts["count"] += 1
+        if attempts["count"] == 1:
+            return httpx.Response(503, json={"ok": False})
+        return httpx.Response(200, json={"ok": True, "ts": "999.1"})
+    async def fake_sleep(_delay: float) -> None:
+        return None
+    monkeypatch.setattr("agent.messaging.gateway.asyncio.sleep", fake_sleep)
+    config = _config_with_messaging(allow_agent_tool=True)
+    gateway = NotificationGateway(config.messaging)
+    async with httpx.AsyncClient(transport=httpx.MockTransport(handler)) as client:
+        gateway._client = client
+        result = await gateway.send(
+            NotificationRequest(
+                destination="slack.ops",
+                message="hello",
+            )
+        )
+        gateway._client = None
+    assert attempts["count"] == 2
+    assert result.ok
+@pytest.mark.asyncio
+async def test_notify_tool_rejects_non_allowlisted_destinations():
+    config = _config_with_messaging(allow_agent_tool=False)
+    gateway = RecordingGateway()
+    session = _test_session(config, gateway)
+    output, ok = await notify_handler(
+        {"destinations": ["slack.ops"], "message": "done"},
+        session=session,
+    )
+    assert not ok
+    assert "unavailable for the notify tool" in output
+    assert gateway.sent == []
+@pytest.mark.asyncio
+async def test_notify_tool_sends_to_allowlisted_destinations():
+    config = _config_with_messaging(allow_agent_tool=True)
+    gateway = RecordingGateway()
+    session = _test_session(config, gateway, session_id="sess-42")
+    output, ok = await notify_handler(
+        {
+            "destinations": ["slack.ops"],
+            "title": "Training complete",
+            "message": "The run finished successfully.",
+            "severity": "success",
+        },
+        session=session,
+    )
+    assert ok
+    assert output == "slack.ops: sent"
+    assert len(gateway.sent) == 1
+    sent = gateway.sent[0]
+    assert sent.metadata["session_id"] == "sess-42"
+    assert sent.metadata["model"] == "moonshotai/Kimi-K2.6"
+@pytest.mark.asyncio
+async def test_session_auto_notifications_only_send_opted_in_auto_destinations():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    },
+                    "slack.tool": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C999",
+                        "allow_agent_tool": True,
+                    },
+                },
+            },
+        }
+    )
+    gateway = RecordingGateway()
+    session = _test_session(config, gateway, session_id="sess-auto")
+    session.set_notification_destinations(["slack.ops", "slack.tool"])
+    await session.send_event(
+        Event(
+            event_type="approval_required",
+            data={"tools": [{"tool": "hf_jobs", "tool_call_id": "tc-1"}]},
+        )
+    )
+    await session.send_event(
+        Event(event_type="assistant_message", data={"content": "normal message"})
+    )
+    assert len(gateway.enqueued) == 1
+    request = gateway.enqueued[0]
+    assert request.destination == "slack.ops"
+    assert request.severity == "warning"
+    assert request.event_type == "approval_required"
+    assert "hf_jobs" in request.message
+@pytest.mark.asyncio
+async def test_turn_complete_auto_notification_includes_final_response_summary():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    }
+                },
+            },
+        }
+    )
+    gateway = RecordingGateway()
+    session = _test_session(config, gateway, session_id="sess-done")
+    session.set_notification_destinations(["slack.ops"])
+    await session.send_event(
+        Event(
+            event_type="turn_complete",
+            data={
+                "history_size": 12,
+                "final_response": "Evaluation finished. Accuracy: 84.2% on the validation split.",
+            },
+        )
+    )
+    assert len(gateway.enqueued) == 1
+    request = gateway.enqueued[0]
+    assert request.destination == "slack.ops"
+    assert request.severity == "success"
+    assert request.event_type == "turn_complete"
+    assert "completed successfully" in request.message
+    assert "Accuracy: 84.2%" in request.message
+@pytest.mark.asyncio
+async def test_turn_complete_auto_notification_supports_longer_summary():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    }
+                },
+            },
+        }
+    )
+    gateway = RecordingGateway()
+    session = _test_session(config, gateway, session_id="sess-long")
+    session.set_notification_destinations(["slack.ops"])
+    long_summary = "A" * 1200 + " END"
+    await session.send_event(
+        Event(
+            event_type="turn_complete",
+            data={
+                "history_size": 12,
+                "final_response": long_summary,
+            },
+        )
+    )
+    assert len(gateway.enqueued) == 1
+    request = gateway.enqueued[0]
+    assert request.event_type == "turn_complete"
+    assert "A" * 1200 in request.message
+    assert request.message.endswith("END")
+@pytest.mark.asyncio
+async def test_turn_complete_auto_notification_can_be_deferred():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    }
+                },
+            },
+        }
+    )
+    gateway = RecordingGateway()
+    session = Session(
+        asyncio.Queue(),
+        config=config,
+        tool_router=DummyToolRouter(),
+        context_manager=SimpleNamespace(items=[]),
+        notification_gateway=gateway,
+        notification_destinations=["slack.ops"],
+        defer_turn_complete_notification=True,
+        session_id="sess-deferred",
+    )
+    event = Event(
+        event_type="turn_complete",
+        data={"final_response": "Finished after the CLI drained the stream."},
+    )
+    await session.send_event(event)
+    assert gateway.enqueued == []
+    await session.send_deferred_turn_complete_notification(event)
+    assert len(gateway.enqueued) == 1
+    request = gateway.enqueued[0]
+    assert request.destination == "slack.ops"
+    assert request.event_type == "turn_complete"
+    assert "Finished after the CLI drained the stream." in request.message
+@pytest.mark.asyncio
+async def test_turn_complete_can_be_disabled_by_custom_auto_event_config():
+    config = Config.model_validate(
+        {
+            "model_name": "moonshotai/Kimi-K2.6",
+            "messaging": {
+                "enabled": True,
+                "auto_event_types": ["error"],
+                "destinations": {
+                    "slack.ops": {
+                        "provider": "slack",
+                        "token": "xoxb-test",
+                        "channel": "C123",
+                        "allow_auto_events": True,
+                    }
+                },
+            },
+        }
+    )
+    gateway = RecordingGateway()
+    session = _test_session(config, gateway, session_id="sess-optout")
+    session.set_notification_destinations(["slack.ops"])
+    await session.send_event(
+        Event(
+            event_type="turn_complete",
+            data={"final_response": "This should not notify."},
+        )
+    )
+    assert gateway.enqueued == []
+def test_session_manager_updates_notification_destinations_in_session_info():
+    config = _config_with_messaging(allow_auto_events=True)
+    manager = SessionManager(str(Path(__file__).resolve().parents[2] / "configs" / "cli_agent_config.json"))
+    manager.config = config
+    manager.sessions = {}
+    session = _test_session(config, RecordingGateway(), session_id="sess-manager")
+    manager.sessions["sess-manager"] = AgentSession(
+        session_id="sess-manager",
+        session=session,
+        tool_router=DummyToolRouter(),
+        submission_queue=asyncio.Queue(),
+    )
+    updated = manager.set_notification_destinations(
+        "sess-manager",
+        ["slack.ops", "slack.ops"],
+    )
+    assert updated == ["slack.ops"]
+    info = manager.get_session_info("sess-manager")
+    assert info is not None
+    assert info["notification_destinations"] == ["slack.ops"]
+    with pytest.raises(ValueError):
+        manager.set_notification_destinations("sess-manager", ["slack.unknown"])

tests/unit/test_thinking_history.py CHANGED Viewed

@@ -159,7 +159,7 @@ async def test_streaming_call_rebuilds_anthropic_thinking_state(monkeypatch):
 @pytest.mark.asyncio
-async def test_streaming_call_collects_anthropic_delta_thinking_state(monkeypatch):
     async def fake_stream():
         yield SimpleNamespace(
             choices=[
@@ -167,7 +167,31 @@ async def test_streaming_call_collects_anthropic_delta_thinking_state(monkeypatc
                     delta=SimpleNamespace(
                         content=None,
                         tool_calls=None,
-                        thinking_blocks=[{"type": "thinking", "thinking": "reasoned"}],
                     ),
                     finish_reason=None,
                 )
@@ -186,8 +210,26 @@ async def test_streaming_call_collects_anthropic_delta_thinking_state(monkeypatc
     async def fake_acompletion(**_kwargs):
         return fake_stream()
-    def fail_chunk_builder(*_args, **_kwargs):
-        raise AssertionError("stream_chunk_builder should not run when deltas include thinking")
     events = []
     async def send_event(event):
@@ -199,7 +241,7 @@ async def test_streaming_call_collects_anthropic_delta_thinking_state(monkeypatc
         send_event=send_event,
     )
     monkeypatch.setattr(agent_loop, "acompletion", fake_acompletion)
-    monkeypatch.setattr(agent_loop, "stream_chunk_builder", fail_chunk_builder)
     result = await _call_llm_streaming(
         session,
@@ -209,7 +251,10 @@ async def test_streaming_call_collects_anthropic_delta_thinking_state(monkeypatc
     )
     assert result.content == "done"
-    assert result.thinking_blocks == [{"type": "thinking", "thinking": "reasoned"}]
 @pytest.mark.asyncio

 @pytest.mark.asyncio
+async def test_streaming_call_rebuilds_anthropic_delta_thinking_state(monkeypatch):
     async def fake_stream():
         yield SimpleNamespace(
             choices=[
                     delta=SimpleNamespace(
                         content=None,
                         tool_calls=None,
+                        thinking_blocks=[
+                            {
+                                "type": "thinking",
+                                "thinking": "reasoned",
+                                "signature": "",
+                            }
+                        ],
+                    ),
+                    finish_reason=None,
+                )
+            ],
+        )
+        yield SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    delta=SimpleNamespace(
+                        content=None,
+                        tool_calls=None,
+                        thinking_blocks=[
+                            {
+                                "type": "thinking",
+                                "thinking": "",
+                                "signature": "signed",
+                            }
+                        ],
                     ),
                     finish_reason=None,
                 )
     async def fake_acompletion(**_kwargs):
         return fake_stream()
+    def fake_chunk_builder(chunks, **_kwargs):
+        assert len(chunks) == 4
+        return SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    message=Message(
+                        role="assistant",
+                        content="done",
+                        thinking_blocks=[
+                            {
+                                "type": "thinking",
+                                "thinking": "reasoned",
+                                "signature": "signed",
+                            }
+                        ],
+                        reasoning_content="reasoned",
+                    )
+                )
+            ]
+        )
     events = []
     async def send_event(event):
         send_event=send_event,
     )
     monkeypatch.setattr(agent_loop, "acompletion", fake_acompletion)
+    monkeypatch.setattr(agent_loop, "stream_chunk_builder", fake_chunk_builder)
     result = await _call_llm_streaming(
         session,
     )
     assert result.content == "done"
+    assert result.thinking_blocks == [
+        {"type": "thinking", "thinking": "reasoned", "signature": "signed"}
+    ]
+    assert result.reasoning_content == "reasoned"
 @pytest.mark.asyncio

uv.lock CHANGED Viewed

@@ -1832,7 +1832,7 @@ requires-dist = [
     { name = "prompt-toolkit", specifier = ">=3.0.0" },
     { name = "pydantic", specifier = ">=2.12.3" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.26.0" },
     { name = "python-dotenv", specifier = ">=1.2.1" },
     { name = "requests", specifier = ">=2.33.0" },
     { name = "rich", specifier = ">=13.0.0" },

     { name = "prompt-toolkit", specifier = ">=3.0.0" },
     { name = "pydantic", specifier = ">=2.12.3" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=1.2.0" },
     { name = "python-dotenv", specifier = ">=1.2.1" },
     { name = "requests", specifier = ">=2.33.0" },
     { name = "rich", specifier = ">=13.0.0" },