Spaces:

smolagents
/

ml-agent

Running

App Files Files Community

akseljoonas HF Staff commited on Feb 26

Commit

bdbcdab

1 Parent(s): 7b48ae0

feat: merge HF Space improvements

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +4 -0
agent/context_manager/manager.py +76 -9
agent/core/agent_loop.py +382 -123
agent/core/session.py +48 -9
agent/core/session_uploader.py +2 -4
agent/core/tools.py +12 -9
agent/prompts/system_prompt.yaml +2 -2
agent/prompts/system_prompt_v2.yaml +46 -59
agent/prompts/system_prompt_v3.yaml +0 -118
agent/tools/dataset_tools.py +16 -9
agent/tools/docs_tools.py +21 -10
agent/tools/github_find_examples.py +49 -10
agent/tools/github_read_file.py +52 -6
agent/tools/jobs_tool.py +138 -122
agent/tools/plan_tool.py +12 -5
agent/tools/sandbox_client.py +0 -714
agent/tools/sandbox_tool.py +0 -201
backend/dependencies.py +144 -0
backend/main.py +8 -0
backend/models.py +12 -0
backend/routes/agent.py +282 -27
backend/routes/auth.py +74 -51
backend/session_manager.py +114 -14
backend/websocket.py +0 -10
configs/main_agent_config.json +2 -2
frontend/package-lock.json +168 -0
frontend/package.json +2 -0
frontend/src/App.tsx +5 -0
frontend/src/components/ApprovalModal/ApprovalModal.tsx +0 -208
frontend/src/components/Chat/ActivityStatusBar.tsx +57 -0
frontend/src/components/Chat/ApprovalFlow.tsx +0 -515
frontend/src/components/Chat/AssistantMessage.tsx +119 -0
frontend/src/components/Chat/ChatInput.tsx +218 -15
frontend/src/components/Chat/MarkdownContent.tsx +160 -0
frontend/src/components/Chat/MessageBubble.tsx +32 -203
frontend/src/components/Chat/MessageList.tsx +125 -74
frontend/src/components/Chat/ThinkingIndicator.tsx +48 -0
frontend/src/components/Chat/ToolCallGroup.tsx +655 -0
frontend/src/components/Chat/UserMessage.tsx +105 -0
frontend/src/components/CodePanel/CodePanel.tsx +479 -256
frontend/src/components/Layout/AppLayout.tsx +351 -167
frontend/src/components/SessionSidebar/SessionSidebar.tsx +279 -181
frontend/src/components/WelcomeScreen/WelcomeScreen.tsx +247 -0
frontend/src/hooks/useAgentChat.ts +278 -0
frontend/src/hooks/useAgentWebSocket.ts +0 -503
frontend/src/hooks/useAuth.ts +77 -0
frontend/src/lib/chat-message-store.ts +62 -0
frontend/src/lib/ws-chat-transport.ts +593 -0
frontend/src/main.tsx +13 -3
frontend/src/store/agentStore.ts +121 -206

README.md CHANGED Viewed

@@ -9,7 +9,11 @@ hf_oauth: true
 hf_oauth_scopes:
   - read-repos
   - write-repos
   - inference-api
 ---
 # HF Agent

 hf_oauth_scopes:
   - read-repos
   - write-repos
+  - contribute-repos
+  - manage-repos
   - inference-api
+  - jobs
+  - write-discussions
 ---
 # HF Agent

agent/context_manager/manager.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Context management for conversation history
 """
 import os
 import zoneinfo
 from datetime import datetime
@@ -13,6 +14,72 @@ from huggingface_hub import HfApi
 from jinja2 import Template
 from litellm import Message, acompletion
 class ContextManager:
     """Manages conversation context and message history for the agent"""
@@ -23,11 +90,11 @@ class ContextManager:
         compact_size: float = 0.1,
         untouched_messages: int = 5,
         tool_specs: list[dict[str, Any]] | None = None,
-        prompt_file_suffix: str = "system_prompt_v3.yaml",
     ):
         self.system_prompt = self._load_system_prompt(
             tool_specs or [],
-            prompt_file_suffix="system_prompt_v3.yaml",
         )
         self.max_context = max_context
         self.compact_size = int(max_context * compact_size)
@@ -54,9 +121,8 @@ class ContextManager:
         current_time = now.strftime("%H:%M:%S.%f")[:-3]
         current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
-        # Get HF user info with explicit token from env
-        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
-        hf_user_info = HfApi(token=hf_token).whoami().get("name", "unknown")
         template = Template(template_str)
         return template.render(
@@ -78,9 +144,7 @@ class ContextManager:
         """Get all messages for sending to LLM"""
         return self.items
-    async def compact(
-        self, model_name: str, tool_specs: list[dict] | None = None
-    ) -> None:
         """Remove old messages to keep history under target size"""
         if (self.context_length <= self.max_context) or not self.items:
             return
@@ -110,11 +174,14 @@ class ContextManager:
             )
         )
         response = await acompletion(
             model=model_name,
             messages=messages_to_summarize,
             max_completion_tokens=self.compact_size,
-            tools=tool_specs,
         )
         summarized_message = Message(
             role="assistant", content=response.choices[0].message.content

 Context management for conversation history
 """
+import logging
 import os
 import zoneinfo
 from datetime import datetime
 from jinja2 import Template
 from litellm import Message, acompletion
+logger = logging.getLogger(__name__)
+# Module-level cache for HF username — avoids repeating the slow whoami() call
+_hf_username_cache: str | None = None
+_HF_WHOAMI_URL = "https://huggingface.co/api/whoami-v2"
+_HF_WHOAMI_TIMEOUT = 5  # seconds
+def _get_hf_username() -> str:
+    """Return the HF username, cached after the first call.
+    Uses subprocess + curl to avoid Python HTTP client IPv6 issues that
+    cause 40+ second hangs (httpx/urllib try IPv6 first which times out
+    at OS level before falling back to IPv4 — the "Happy Eyeballs" problem).
+    """
+    import json
+    import subprocess
+    import time as _t
+    global _hf_username_cache
+    if _hf_username_cache is not None:
+        return _hf_username_cache
+    hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
+    if not hf_token:
+        logger.warning("No HF_TOKEN set, using 'unknown' as username")
+        _hf_username_cache = "unknown"
+        return _hf_username_cache
+    t0 = _t.monotonic()
+    try:
+        result = subprocess.run(
+            [
+                "curl",
+                "-s",
+                "-4",  # force IPv4
+                "-m",
+                str(_HF_WHOAMI_TIMEOUT),  # max time
+                "-H",
+                f"Authorization: Bearer {hf_token}",
+                _HF_WHOAMI_URL,
+            ],
+            capture_output=True,
+            text=True,
+            timeout=_HF_WHOAMI_TIMEOUT + 2,
+        )
+        t1 = _t.monotonic()
+        if result.returncode == 0 and result.stdout:
+            data = json.loads(result.stdout)
+            _hf_username_cache = data.get("name", "unknown")
+            logger.info(
+                f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s"
+            )
+        else:
+            logger.warning(
+                f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
+            )
+            _hf_username_cache = "unknown"
+    except Exception as e:
+        t1 = _t.monotonic()
+        logger.warning(f"HF whoami failed in {t1 - t0:.2f}s: {e}")
+        _hf_username_cache = "unknown"
+    return _hf_username_cache
 class ContextManager:
     """Manages conversation context and message history for the agent"""
         compact_size: float = 0.1,
         untouched_messages: int = 5,
         tool_specs: list[dict[str, Any]] | None = None,
+        prompt_file_suffix: str = "system_prompt_v2.yaml",
     ):
         self.system_prompt = self._load_system_prompt(
             tool_specs or [],
+            prompt_file_suffix="system_prompt_v2.yaml",
         )
         self.max_context = max_context
         self.compact_size = int(max_context * compact_size)
         current_time = now.strftime("%H:%M:%S.%f")[:-3]
         current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
+        # Get HF user info (cached after the first call)
+        hf_user_info = _get_hf_username()
         template = Template(template_str)
         return template.render(
         """Get all messages for sending to LLM"""
         return self.items
+    async def compact(self, model_name: str) -> None:
         """Remove old messages to keep history under target size"""
         if (self.context_length <= self.max_context) or not self.items:
             return
             )
         )
+        hf_key = os.environ.get("INFERENCE_TOKEN")
         response = await acompletion(
             model=model_name,
             messages=messages_to_summarize,
             max_completion_tokens=self.compact_size,
+            api_key=hf_key
+            if hf_key and model_name.startswith("huggingface/")
+            else None,
         )
         summarized_message = Message(
             role="assistant", content=response.choices[0].message.content

agent/core/agent_loop.py CHANGED Viewed

@@ -4,9 +4,10 @@ Main agent implementation with integrated tool system and MCP support
 import asyncio
 import json
-from litellm import ChatCompletionMessageToolCall, Message, ModelResponse, acompletion
-from litellm.exceptions import ContextWindowExceededError
 from lmnr import observe
 from agent.config import Config
@@ -14,7 +15,42 @@ from agent.core.session import Event, OpType, Session
 from agent.core.tools import ToolRouter
 from agent.tools.jobs_tool import CPU_FLAVORS
 ToolCall = ChatCompletionMessageToolCall
 def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
@@ -52,9 +88,6 @@ def _needs_approval(
     if not args_valid:
         return False
-    if tool_name == "sandbox_create":
-        return True
     if tool_name == "hf_jobs":
         operation = tool_args.get("operation", "")
         if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
@@ -109,31 +142,49 @@ def _needs_approval(
     return False
-async def _compact_and_notify(session: Session) -> None:
-    """Run compaction and send event if context was reduced."""
-    old_length = session.context_manager.context_length
-    tool_specs = session.tool_router.get_tool_specs_for_llm()
-    await session.context_manager.compact(
-        model_name=session.config.model_name,
-        tool_specs=tool_specs,
-    )
-    new_length = session.context_manager.context_length
-    if new_length != old_length:
-        await session.send_event(
-            Event(
-                event_type="compacted",
-                data={"old_tokens": old_length, "new_tokens": new_length},
             )
-        )
-class Handlers:
-    """Handler functions for each operation type"""
     @staticmethod
     @observe(name="run_agent")
     async def run_agent(
-        session: Session, text: str, max_iterations: int = 300
     ) -> str | None:
         """
         Handle user input (like user_input_or_turn in codex.rs:1291)
@@ -145,6 +196,11 @@ class Handlers:
             Laminar.set_trace_session_id(session_id=session.session_id)
         # Add user message to history only if there's actual content
         if text:
             user_msg = Message(role="user", content=text)
@@ -160,42 +216,102 @@ class Handlers:
         final_response = None
         while iteration < max_iterations:
-            # Compact before calling the LLM if context is near the limit
-            await _compact_and_notify(session)
             messages = session.context_manager.get_messages()
             tools = session.tool_router.get_tool_specs_for_llm()
             try:
-                response: ModelResponse = await acompletion(
-                    model=session.config.model_name,
                     messages=messages,
                     tools=tools,
                     tool_choice="auto",
                 )
-                # Extract text response, token usage, and tool calls
-                message = response.choices[0].message
-                content = message.content
-                token_count = response.usage.total_tokens
-                tool_calls: list[ToolCall] = message.get("tool_calls", [])
                 # If no tool calls, add assistant message and we're done
                 if not tool_calls:
                     if content:
                         assistant_msg = Message(role="assistant", content=content)
                         session.context_manager.add_message(assistant_msg, token_count)
-                        await session.send_event(
-                            Event(
-                                event_type="assistant_message",
-                                data={"content": content},
-                            )
-                        )
                         final_response = content
                     break
                 # Add assistant message with tool calls to history
-                # LiteLLM will format this correctly for the provider
                 assistant_msg = Message(
                     role="assistant",
                     content=content,
@@ -203,66 +319,97 @@ class Handlers:
                 )
                 session.context_manager.add_message(assistant_msg, token_count)
-                if content:
-                    await session.send_event(
-                        Event(event_type="assistant_message", data={"content": content})
-                    )
                 # Separate tools into those requiring approval and those that don't
                 approval_required_tools = []
                 non_approval_tools = []
                 for tc in tool_calls:
                     tool_name = tc.function.name
-                    tool_args = json.loads(tc.function.arguments)
                     if _needs_approval(tool_name, tool_args, session.config):
                         approval_required_tools.append(tc)
                     else:
                         non_approval_tools.append(tc)
-                # Execute non-approval tools first
-                for tc in non_approval_tools:
-                    tool_name = tc.function.name
-                    tool_args = json.loads(tc.function.arguments)
-                    # Validate tool arguments before calling
-                    args_valid, error_msg = _validate_tool_args(tool_args)
-                    if not args_valid:
-                        # Return error to agent instead of calling tool
-                        output = error_msg
-                        success = False
-                    else:
-                        await session.send_event(
-                            Event(
-                                event_type="tool_call",
-                                data={"tool": tool_name, "arguments": tool_args},
                             )
-                        )
-                        output, success = await session.tool_router.call_tool(
-                            tool_name, tool_args, session=session
                         )
-                    # Add tool result to history
-                    tool_msg = Message(
-                        role="tool",
-                        content=output,
-                        tool_call_id=tc.id,
-                        name=tool_name,
                     )
-                    session.context_manager.add_message(tool_msg)
-                    await session.send_event(
-                        Event(
-                            event_type="tool_output",
-                            data={
-                                "tool": tool_name,
-                                "output": output,
-                                "success": success,
-                            },
                         )
-                    )
                 # If there are tools requiring approval, ask for batch approval
                 if approval_required_tools:
@@ -270,7 +417,10 @@ class Handlers:
                     tools_data = []
                     for tc in approval_required_tools:
                         tool_name = tc.function.name
-                        tool_args = json.loads(tc.function.arguments)
                         tools_data.append(
                             {
                                 "tool": tool_name,
@@ -299,14 +449,6 @@ class Handlers:
                 iteration += 1
-            except ContextWindowExceededError:
-                # Force compact and retry this iteration
-                session.context_manager.context_length = (
-                    session.context_manager.max_context + 1
-                )
-                await _compact_and_notify(session)
-                continue
             except Exception as e:
                 import traceback
@@ -318,6 +460,18 @@ class Handlers:
                 )
                 break
         await session.send_event(
             Event(
                 event_type="turn_complete",
@@ -337,13 +491,43 @@ class Handlers:
         session.interrupt()
         await session.send_event(Event(event_type="interrupted"))
     @staticmethod
     async def undo(session: Session) -> None:
-        """Handle undo (like undo in codex.rs:1314)"""
-        # Remove last user turn and all following items
-        # Simplified: just remove last 2 items
-        for _ in range(min(2, len(session.context_manager.items))):
-            session.context_manager.items.pop()
         await session.send_event(Event(event_type="undo_complete"))
@@ -371,6 +555,9 @@ class Handlers:
         # Create a map of tool_call_id -> approval decision
         approval_map = {a["tool_call_id"]: a for a in approvals}
         # Separate approved and rejected tool calls
         approved_tasks = []
@@ -378,36 +565,99 @@ class Handlers:
         for tc in tool_calls:
             tool_name = tc.function.name
-            tool_args = json.loads(tc.function.arguments)
             approval_decision = approval_map.get(tc.id, {"approved": False})
             if approval_decision.get("approved", False):
-                approved_tasks.append((tc, tool_name, tool_args))
             else:
                 rejected_tasks.append((tc, tool_name, approval_decision))
         # Execute all approved tools concurrently
-        async def execute_tool(tc, tool_name, tool_args):
-            """Execute a single tool and return its result"""
             await session.send_event(
                 Event(
-                    event_type="tool_call",
-                    data={"tool": tool_name, "arguments": tool_args},
                 )
             )
             output, success = await session.tool_router.call_tool(
-                tool_name, tool_args, session=session
             )
-            return (tc, tool_name, output, success)
         # Execute all approved tools concurrently and wait for ALL to complete
         if approved_tasks:
             results = await asyncio.gather(
                 *[
-                    execute_tool(tc, tool_name, tool_args)
-                    for tc, tool_name, tool_args in approved_tasks
                 ],
                 return_exceptions=True,
             )
@@ -416,10 +666,13 @@ class Handlers:
             for result in results:
                 if isinstance(result, Exception):
                     # Handle execution error
-                    print(f"Tool execution error: {result}")
                     continue
-                tc, tool_name, output, success = result
                 # Add tool result to context
                 tool_msg = Message(
@@ -435,6 +688,7 @@ class Handlers:
                         event_type="tool_output",
                         data={
                             "tool": tool_name,
                             "output": output,
                             "success": success,
                         },
@@ -446,7 +700,14 @@ class Handlers:
             rejection_msg = "Job execution cancelled by user"
             user_feedback = approval_decision.get("feedback")
             if user_feedback:
-                rejection_msg += f". User feedback: {user_feedback}"
             tool_msg = Message(
                 role="tool",
@@ -461,6 +722,7 @@ class Handlers:
                     event_type="tool_output",
                     data={
                         "tool": tool_name,
                         "output": rejection_msg,
                         "success": False,
                     },
@@ -478,11 +740,9 @@ class Handlers:
         """Handle shutdown (like shutdown in codex.rs:1329)"""
         # Save session trajectory if enabled (fire-and-forget, returns immediately)
         if session.config.save_sessions:
-            print("💾 Saving session...")
             repo_id = session.config.session_dataset_repo
             _ = session.save_and_upload_detached(repo_id)
-            # if local_path:
-            # print("✅ Session saved locally, upload in progress")
         session.is_running = False
         await session.send_event(Event(event_type="shutdown"))
@@ -497,7 +757,7 @@ async def process_submission(session: Session, submission) -> bool:
         bool: True to continue, False to shutdown
     """
     op = submission.operation
-    # print(f"📨 Received: {op.op_type.value}")
     if op.op_type == OpType.USER_INPUT:
         text = op.data.get("text", "") if op.data else ""
@@ -509,8 +769,7 @@ async def process_submission(session: Session, submission) -> bool:
         return True
     if op.op_type == OpType.COMPACT:
-        # compact from the frontend
-        await _compact_and_notify(session)
         return True
     if op.op_type == OpType.UNDO:
@@ -525,7 +784,7 @@ async def process_submission(session: Session, submission) -> bool:
     if op.op_type == OpType.SHUTDOWN:
         return not await Handlers.shutdown(session)
-    print(f"⚠️  Unknown operation: {op.op_type}")
     return True
@@ -543,7 +802,7 @@ async def submission_loop(
     # Create session with tool router
     session = Session(event_queue, config=config, tool_router=tool_router)
-    print("Agent loop started")
     # Retry any failed uploads from previous sessions (fire-and-forget)
     if config and config.save_sessions:
@@ -567,25 +826,25 @@ async def submission_loop(
                     if not should_continue:
                         break
                 except asyncio.CancelledError:
-                    print("\n⚠️  Agent loop cancelled")
                     break
                 except Exception as e:
-                    print(f"❌ Error in agent loop: {e}")
                     await session.send_event(
                         Event(event_type="error", data={"error": str(e)})
                     )
-        print("🛑 Agent loop exited")
     finally:
         # Emergency save if session saving is enabled and shutdown wasn't called properly
         if session.config.save_sessions and session.is_running:
-            print("\n💾 Emergency save: preserving session before exit...")
             try:
                 local_path = session.save_and_upload_detached(
                     session.config.session_dataset_repo
                 )
                 if local_path:
-                    print("✅ Emergency save successful, upload in progress")
             except Exception as e:
-                print(f"❌ Emergency save failed: {e}")

 import asyncio
 import json
+import logging
+import os
+from litellm import ChatCompletionMessageToolCall, Message, acompletion
 from lmnr import observe
 from agent.config import Config
 from agent.core.tools import ToolRouter
 from agent.tools.jobs_tool import CPU_FLAVORS
+logger = logging.getLogger(__name__)
 ToolCall = ChatCompletionMessageToolCall
+# Explicit inference token — needed because litellm checks HF_TOKEN before
+# HUGGINGFACE_API_KEY, and HF_TOKEN (used for Hub ops) may lack inference permissions.
+_INFERENCE_API_KEY = os.environ.get("INFERENCE_TOKEN")
+def _resolve_hf_router_params(model_name: str) -> dict:
+    """
+    Build LiteLLM kwargs for HuggingFace Router models.
+    api-inference.huggingface.co is deprecated; the new router lives at
+    router.huggingface.co/<provider>/v3/openai.  LiteLLM's built-in
+    ``huggingface/`` provider still targets the old endpoint, so we
+    rewrite model names to ``openai/`` and supply the correct api_base.
+    Input format:  huggingface/<router_provider>/<org>/<model>
+    Example:       huggingface/novita/moonshotai/kimi-k2.5
+    """
+    if not model_name.startswith("huggingface/"):
+        return {"model": model_name}
+    parts = model_name.split("/", 2)  # ['huggingface', 'novita', 'moonshotai/kimi-k2.5']
+    if len(parts) < 3:
+        return {"model": model_name}
+    router_provider = parts[1]
+    actual_model = parts[2]
+    api_key = _INFERENCE_API_KEY or os.environ.get("HF_TOKEN")
+    return {
+        "model": f"openai/{actual_model}",
+        "api_base": f"https://router.huggingface.co/{router_provider}/v3/openai",
+        "api_key": api_key,
+    }
 def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
     if not args_valid:
         return False
     if tool_name == "hf_jobs":
         operation = tool_args.get("operation", "")
         if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
     return False
+class Handlers:
+    """Handler functions for each operation type"""
+    @staticmethod
+    async def _abandon_pending_approval(session: Session) -> None:
+        """Cancel pending approval tools when the user continues the conversation.
+        Injects rejection tool-result messages into the LLM context (so the
+        history stays valid) and notifies the frontend that those tools were
+        abandoned.
+        """
+        tool_calls = session.pending_approval.get("tool_calls", [])
+        for tc in tool_calls:
+            tool_name = tc.function.name
+            abandon_msg = "Task abandoned — user continued the conversation without approving."
+            # Keep LLM context valid: every tool_call needs a tool result
+            tool_msg = Message(
+                role="tool",
+                content=abandon_msg,
+                tool_call_id=tc.id,
+                name=tool_name,
             )
+            session.context_manager.add_message(tool_msg)
+            await session.send_event(
+                Event(
+                    event_type="tool_state_change",
+                    data={
+                        "tool_call_id": tc.id,
+                        "tool": tool_name,
+                        "state": "abandoned",
+                    },
+                )
+            )
+        session.pending_approval = None
+        logger.info("Abandoned %d pending approval tool(s)", len(tool_calls))
     @staticmethod
     @observe(name="run_agent")
     async def run_agent(
+        session: Session, text: str, max_iterations: int = 10
     ) -> str | None:
         """
         Handle user input (like user_input_or_turn in codex.rs:1291)
             Laminar.set_trace_session_id(session_id=session.session_id)
+        # If there's a pending approval and the user sent a new message,
+        # abandon the pending tools so the LLM context stays valid.
+        if text and session.pending_approval:
+            await Handlers._abandon_pending_approval(session)
         # Add user message to history only if there's actual content
         if text:
             user_msg = Message(role="user", content=text)
         final_response = None
         while iteration < max_iterations:
             messages = session.context_manager.get_messages()
             tools = session.tool_router.get_tool_specs_for_llm()
             try:
+                # ── Stream the LLM response ──────────────────────────
+                llm_params = _resolve_hf_router_params(session.config.model_name)
+                response = await acompletion(
                     messages=messages,
                     tools=tools,
                     tool_choice="auto",
+                    stream=True,
+                    stream_options={"include_usage": True},
+                    **llm_params,
                 )
+                full_content = ""
+                tool_calls_acc: dict[int, dict] = {}
+                token_count = 0
+                async for chunk in response:
+                    choice = chunk.choices[0] if chunk.choices else None
+                    if not choice:
+                        # Last chunk may carry only usage info
+                        if hasattr(chunk, "usage") and chunk.usage:
+                            token_count = chunk.usage.total_tokens
+                        continue
+                    delta = choice.delta
+                    # Stream text deltas to the frontend
+                    if delta.content:
+                        full_content += delta.content
+                        await session.send_event(
+                            Event(
+                                event_type="assistant_chunk",
+                                data={"content": delta.content},
+                            )
+                        )
+                    # Accumulate tool-call deltas (name + args arrive in pieces)
+                    if delta.tool_calls:
+                        for tc_delta in delta.tool_calls:
+                            idx = tc_delta.index
+                            if idx not in tool_calls_acc:
+                                tool_calls_acc[idx] = {
+                                    "id": "",
+                                    "type": "function",
+                                    "function": {"name": "", "arguments": ""},
+                                }
+                            if tc_delta.id:
+                                tool_calls_acc[idx]["id"] = tc_delta.id
+                            if tc_delta.function:
+                                if tc_delta.function.name:
+                                    tool_calls_acc[idx]["function"]["name"] += (
+                                        tc_delta.function.name
+                                    )
+                                if tc_delta.function.arguments:
+                                    tool_calls_acc[idx]["function"]["arguments"] += (
+                                        tc_delta.function.arguments
+                                    )
+                    # Capture usage from the final chunk
+                    if hasattr(chunk, "usage") and chunk.usage:
+                        token_count = chunk.usage.total_tokens
+                # ── Stream finished — reconstruct full message ───────
+                content = full_content or None
+                # Build tool_calls list from accumulated deltas
+                tool_calls: list[ToolCall] = []
+                for idx in sorted(tool_calls_acc.keys()):
+                    tc_data = tool_calls_acc[idx]
+                    tool_calls.append(
+                        ToolCall(
+                            id=tc_data["id"],
+                            type="function",
+                            function={
+                                "name": tc_data["function"]["name"],
+                                "arguments": tc_data["function"]["arguments"],
+                            },
+                        )
+                    )
+                # Signal end of streaming to the frontend
+                await session.send_event(
+                    Event(event_type="assistant_stream_end", data={})
+                )
                 # If no tool calls, add assistant message and we're done
                 if not tool_calls:
                     if content:
                         assistant_msg = Message(role="assistant", content=content)
                         session.context_manager.add_message(assistant_msg, token_count)
                         final_response = content
                     break
                 # Add assistant message with tool calls to history
                 assistant_msg = Message(
                     role="assistant",
                     content=content,
                 )
                 session.context_manager.add_message(assistant_msg, token_count)
                 # Separate tools into those requiring approval and those that don't
                 approval_required_tools = []
                 non_approval_tools = []
                 for tc in tool_calls:
                     tool_name = tc.function.name
+                    try:
+                        tool_args = json.loads(tc.function.arguments)
+                    except (json.JSONDecodeError, TypeError) as e:
+                        logger.warning(f"Malformed tool arguments for {tool_name}: {e}")
+                        tool_args = {}
                     if _needs_approval(tool_name, tool_args, session.config):
                         approval_required_tools.append(tc)
                     else:
                         non_approval_tools.append(tc)
+                # Execute non-approval tools (in parallel when possible)
+                if non_approval_tools:
+                    # 1. Parse args and validate upfront
+                    parsed_tools: list[
+                        tuple[ChatCompletionMessageToolCall, str, dict, bool, str]
+                    ] = []
+                    for tc in non_approval_tools:
+                        tool_name = tc.function.name
+                        try:
+                            tool_args = json.loads(tc.function.arguments)
+                        except (json.JSONDecodeError, TypeError):
+                            tool_args = {}
+                        args_valid, error_msg = _validate_tool_args(tool_args)
+                        parsed_tools.append(
+                            (tc, tool_name, tool_args, args_valid, error_msg)
+                        )
+                    # 2. Send all tool_call events upfront (so frontend shows them all)
+                    for tc, tool_name, tool_args, args_valid, _ in parsed_tools:
+                        if args_valid:
+                            await session.send_event(
+                                Event(
+                                    event_type="tool_call",
+                                    data={
+                                        "tool": tool_name,
+                                        "arguments": tool_args,
+                                        "tool_call_id": tc.id,
+                                    },
+                                )
                             )
+                    # 3. Execute all valid tools in parallel
+                    async def _exec_tool(
+                        tc: ChatCompletionMessageToolCall,
+                        name: str,
+                        args: dict,
+                        valid: bool,
+                        err: str,
+                    ) -> tuple[ChatCompletionMessageToolCall, str, dict, str, bool]:
+                        if not valid:
+                            return (tc, name, args, err, False)
+                        out, ok = await session.tool_router.call_tool(
+                            name, args, session=session
                         )
+                        return (tc, name, args, out, ok)
+                    results = await asyncio.gather(
+                        *[
+                            _exec_tool(tc, name, args, valid, err)
+                            for tc, name, args, valid, err in parsed_tools
+                        ]
                     )
+                    # 4. Record results and send outputs (order preserved)
+                    for tc, tool_name, tool_args, output, success in results:
+                        tool_msg = Message(
+                            role="tool",
+                            content=output,
+                            tool_call_id=tc.id,
+                            name=tool_name,
+                        )
+                        session.context_manager.add_message(tool_msg)
+                        await session.send_event(
+                            Event(
+                                event_type="tool_output",
+                                data={
+                                    "tool": tool_name,
+                                    "tool_call_id": tc.id,
+                                    "output": output,
+                                    "success": success,
+                                },
+                            )
                         )
                 # If there are tools requiring approval, ask for batch approval
                 if approval_required_tools:
                     tools_data = []
                     for tc in approval_required_tools:
                         tool_name = tc.function.name
+                        try:
+                            tool_args = json.loads(tc.function.arguments)
+                        except (json.JSONDecodeError, TypeError):
+                            tool_args = {}
                         tools_data.append(
                             {
                                 "tool": tool_name,
                 iteration += 1
             except Exception as e:
                 import traceback
                 )
                 break
+        old_length = session.context_manager.context_length
+        await session.context_manager.compact(model_name=session.config.model_name)
+        new_length = session.context_manager.context_length
+        if new_length != old_length:
+            await session.send_event(
+                Event(
+                    event_type="compacted",
+                    data={"old_tokens": old_length, "new_tokens": new_length},
+                )
+            )
         await session.send_event(
             Event(
                 event_type="turn_complete",
         session.interrupt()
         await session.send_event(Event(event_type="interrupted"))
+    @staticmethod
+    async def compact(session: Session) -> None:
+        """Handle compact (like compact in codex.rs:1317)"""
+        old_length = session.context_manager.context_length
+        await session.context_manager.compact(model_name=session.config.model_name)
+        new_length = session.context_manager.context_length
+        await session.send_event(
+            Event(
+                event_type="compacted",
+                data={"removed": old_length, "remaining": new_length},
+            )
+        )
     @staticmethod
     async def undo(session: Session) -> None:
+        """Remove the last complete turn (user msg + all assistant/tool msgs that follow).
+        Anthropic requires every tool_use to have a matching tool_result,
+        so we can't just pop 2 items — we must pop everything back to
+        (and including) the last user message to keep the history valid.
+        """
+        items = session.context_manager.items
+        if not items:
+            await session.send_event(Event(event_type="undo_complete"))
+            return
+        # Pop from the end until we've removed the last user message
+        removed_user = False
+        while items:
+            msg = items.pop()
+            if getattr(msg, "role", None) == "user":
+                removed_user = True
+                break
+        if not removed_user:
+            logger.warning("Undo: no user message found to remove")
         await session.send_event(Event(event_type="undo_complete"))
         # Create a map of tool_call_id -> approval decision
         approval_map = {a["tool_call_id"]: a for a in approvals}
+        for a in approvals:
+            if a.get("edited_script"):
+                logger.info(f"Received edited script for tool_call {a['tool_call_id']} ({len(a['edited_script'])} chars)")
         # Separate approved and rejected tool calls
         approved_tasks = []
         for tc in tool_calls:
             tool_name = tc.function.name
+            try:
+                tool_args = json.loads(tc.function.arguments)
+            except (json.JSONDecodeError, TypeError) as e:
+                # Malformed arguments — treat as failed, notify agent
+                logger.warning(f"Malformed tool arguments for {tool_name}: {e}")
+                tool_msg = Message(
+                    role="tool",
+                    content=f"Malformed arguments: {e}",
+                    tool_call_id=tc.id,
+                    name=tool_name,
+                )
+                session.context_manager.add_message(tool_msg)
+                await session.send_event(
+                    Event(
+                        event_type="tool_output",
+                        data={
+                            "tool": tool_name,
+                            "tool_call_id": tc.id,
+                            "output": f"Malformed arguments: {e}",
+                            "success": False,
+                        },
+                    )
+                )
+                continue
             approval_decision = approval_map.get(tc.id, {"approved": False})
             if approval_decision.get("approved", False):
+                edited_script = approval_decision.get("edited_script")
+                was_edited = False
+                if edited_script and "script" in tool_args:
+                    tool_args["script"] = edited_script
+                    was_edited = True
+                    logger.info(f"Using user-edited script for {tool_name} ({tc.id})")
+                approved_tasks.append((tc, tool_name, tool_args, was_edited))
             else:
                 rejected_tasks.append((tc, tool_name, approval_decision))
+        # Notify frontend of approval decisions immediately (before execution)
+        for tc, tool_name, tool_args, _was_edited in approved_tasks:
+            await session.send_event(
+                Event(
+                    event_type="tool_state_change",
+                    data={
+                        "tool_call_id": tc.id,
+                        "tool": tool_name,
+                        "state": "approved",
+                    },
+                )
+            )
+        for tc, tool_name, approval_decision in rejected_tasks:
+            await session.send_event(
+                Event(
+                    event_type="tool_state_change",
+                    data={
+                        "tool_call_id": tc.id,
+                        "tool": tool_name,
+                        "state": "rejected",
+                    },
+                )
+            )
         # Execute all approved tools concurrently
+        async def execute_tool(tc, tool_name, tool_args, was_edited):
+            """Execute a single tool and return its result.
+            The TraceLog already exists on the frontend (created by
+            approval_required), so we send tool_state_change instead of
+            tool_call to avoid creating a duplicate.
+            """
             await session.send_event(
                 Event(
+                    event_type="tool_state_change",
+                    data={
+                        "tool_call_id": tc.id,
+                        "tool": tool_name,
+                        "state": "running",
+                    },
                 )
             )
             output, success = await session.tool_router.call_tool(
+                tool_name, tool_args, session=session, tool_call_id=tc.id
             )
+            return (tc, tool_name, output, success, was_edited)
         # Execute all approved tools concurrently and wait for ALL to complete
         if approved_tasks:
             results = await asyncio.gather(
                 *[
+                    execute_tool(tc, tool_name, tool_args, was_edited)
+                    for tc, tool_name, tool_args, was_edited in approved_tasks
                 ],
                 return_exceptions=True,
             )
             for result in results:
                 if isinstance(result, Exception):
                     # Handle execution error
+                    logger.error(f"Tool execution error: {result}")
                     continue
+                tc, tool_name, output, success, was_edited = result
+                if was_edited:
+                    output = f"[Note: The user edited the script before execution. The output below reflects the user-modified version, not your original script.]\n\n{output}"
                 # Add tool result to context
                 tool_msg = Message(
                         event_type="tool_output",
                         data={
                             "tool": tool_name,
+                            "tool_call_id": tc.id,
                             "output": output,
                             "success": success,
                         },
             rejection_msg = "Job execution cancelled by user"
             user_feedback = approval_decision.get("feedback")
             if user_feedback:
+                # Ensure feedback is a string and sanitize any problematic characters
+                feedback_str = str(user_feedback).strip()
+                # Remove any control characters that might break JSON parsing
+                feedback_str = "".join(char for char in feedback_str if ord(char) >= 32 or char in "\n\t")
+                rejection_msg += f". User feedback: {feedback_str}"
+            # Ensure rejection_msg is a clean string
+            rejection_msg = str(rejection_msg).strip()
             tool_msg = Message(
                 role="tool",
                     event_type="tool_output",
                     data={
                         "tool": tool_name,
+                        "tool_call_id": tc.id,
                         "output": rejection_msg,
                         "success": False,
                     },
         """Handle shutdown (like shutdown in codex.rs:1329)"""
         # Save session trajectory if enabled (fire-and-forget, returns immediately)
         if session.config.save_sessions:
+            logger.info("Saving session...")
             repo_id = session.config.session_dataset_repo
             _ = session.save_and_upload_detached(repo_id)
         session.is_running = False
         await session.send_event(Event(event_type="shutdown"))
         bool: True to continue, False to shutdown
     """
     op = submission.operation
+    logger.debug("Received operation: %s", op.op_type.value)
     if op.op_type == OpType.USER_INPUT:
         text = op.data.get("text", "") if op.data else ""
         return True
     if op.op_type == OpType.COMPACT:
+        await Handlers.compact(session)
         return True
     if op.op_type == OpType.UNDO:
     if op.op_type == OpType.SHUTDOWN:
         return not await Handlers.shutdown(session)
+    logger.warning(f"Unknown operation: {op.op_type}")
     return True
     # Create session with tool router
     session = Session(event_queue, config=config, tool_router=tool_router)
+    logger.info("Agent loop started")
     # Retry any failed uploads from previous sessions (fire-and-forget)
     if config and config.save_sessions:
                     if not should_continue:
                         break
                 except asyncio.CancelledError:
+                    logger.warning("Agent loop cancelled")
                     break
                 except Exception as e:
+                    logger.error(f"Error in agent loop: {e}")
                     await session.send_event(
                         Event(event_type="error", data={"error": str(e)})
                     )
+        logger.info("Agent loop exited")
     finally:
         # Emergency save if session saving is enabled and shutdown wasn't called properly
         if session.config.save_sessions and session.is_running:
+            logger.info("Emergency save: preserving session before exit...")
             try:
                 local_path = session.save_and_upload_detached(
                     session.config.session_dataset_repo
                 )
                 if local_path:
+                    logger.info("Emergency save successful, upload in progress")
             except Exception as e:
+                logger.error(f"Emergency save failed: {e}")

agent/core/session.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import json
 import subprocess
 import sys
 import uuid
@@ -9,11 +10,48 @@ from enum import Enum
 from pathlib import Path
 from typing import Any, Optional
-from litellm import get_max_tokens
 from agent.config import Config
 from agent.context_manager.manager import ContextManager
 class OpType(Enum):
     USER_INPUT = "user_input"
@@ -46,7 +84,7 @@ class Session:
         self.tool_router = tool_router
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
-            max_context=get_max_tokens(config.model_name),
             compact_size=0.1,
             untouched_messages=5,
             tool_specs=tool_specs,
@@ -59,7 +97,8 @@ class Session:
         self.is_running = True
         self.current_task: asyncio.Task | None = None
         self.pending_approval: Optional[dict[str, Any]] = None
-        self.sandbox = None
         # Session trajectory logging
         self.logged_events: list[dict] = []
@@ -100,7 +139,7 @@ class Session:
         turns_since_last_save = self.turn_count - self.last_auto_save_turn
         if turns_since_last_save >= interval:
-            print(f"\n💾 Auto-saving session (turn {self.turn_count})...")
             # Fire-and-forget save - returns immediately
             self.save_and_upload_detached(self.config.session_dataset_repo)
             self.last_auto_save_turn = self.turn_count
@@ -152,7 +191,7 @@ class Session:
             return str(filepath)
         except Exception as e:
-            print(f"Failed to save session locally: {e}")
             return None
     def update_local_save_status(
@@ -172,7 +211,7 @@ class Session:
             return True
         except Exception as e:
-            print(f"Failed to update local save status: {e}")
             return False
     def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
@@ -203,7 +242,7 @@ class Session:
                 start_new_session=True,  # Detach from parent
             )
         except Exception as e:
-            print(f"⚠️  Failed to spawn upload subprocess: {e}")
         return local_path
@@ -233,4 +272,4 @@ class Session:
                 start_new_session=True,  # Detach from parent
             )
         except Exception as e:
-            print(f"⚠️  Failed to spawn retry subprocess: {e}")

 import asyncio
 import json
+import logging
 import subprocess
 import sys
 import uuid
 from pathlib import Path
 from typing import Any, Optional
 from agent.config import Config
 from agent.context_manager.manager import ContextManager
+logger = logging.getLogger(__name__)
+# Local max-token lookup — avoids litellm.get_max_tokens() which can hang
+# on network calls for certain providers (known litellm issue).
+_MAX_TOKENS_MAP: dict[str, int] = {
+    # Anthropic
+    "anthropic/claude-opus-4-5-20251101": 200_000,
+    "anthropic/claude-sonnet-4-5-20250929": 200_000,
+    "anthropic/claude-sonnet-4-20250514": 200_000,
+    "anthropic/claude-haiku-3-5-20241022": 200_000,
+    "anthropic/claude-3-5-sonnet-20241022": 200_000,
+    "anthropic/claude-3-opus-20240229": 200_000,
+    "huggingface/novita/minimax/minimax-m2.1": 196_608,
+    "huggingface/novita/moonshotai/kimi-k2.5": 262_144,
+    "huggingface/novita/zai-org/glm-5": 200_000,
+}
+_DEFAULT_MAX_TOKENS = 200_000
+def _get_max_tokens_safe(model_name: str) -> int:
+    """Return the max context window for a model without network calls."""
+    tokens = _MAX_TOKENS_MAP.get(model_name)
+    if tokens:
+        return tokens
+    # Fallback: try litellm but with a short timeout via threading
+    try:
+        from litellm import get_max_tokens
+        result = get_max_tokens(model_name)
+        if result and isinstance(result, int):
+            return result
+        logger.warning(
+            f"get_max_tokens returned {result} for {model_name}, using default"
+        )
+        return _DEFAULT_MAX_TOKENS
+    except Exception as e:
+        logger.warning(f"get_max_tokens failed for {model_name}, using default: {e}")
+        return _DEFAULT_MAX_TOKENS
 class OpType(Enum):
     USER_INPUT = "user_input"
         self.tool_router = tool_router
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
+            max_context=_get_max_tokens_safe(config.model_name),
             compact_size=0.1,
             untouched_messages=5,
             tool_specs=tool_specs,
         self.is_running = True
         self.current_task: asyncio.Task | None = None
         self.pending_approval: Optional[dict[str, Any]] = None
+        # User's HF OAuth token — set by session_manager after construction
+        self.hf_token: Optional[str] = None
         # Session trajectory logging
         self.logged_events: list[dict] = []
         turns_since_last_save = self.turn_count - self.last_auto_save_turn
         if turns_since_last_save >= interval:
+            logger.info(f"Auto-saving session (turn {self.turn_count})...")
             # Fire-and-forget save - returns immediately
             self.save_and_upload_detached(self.config.session_dataset_repo)
             self.last_auto_save_turn = self.turn_count
             return str(filepath)
         except Exception as e:
+            logger.error(f"Failed to save session locally: {e}")
             return None
     def update_local_save_status(
             return True
         except Exception as e:
+            logger.error(f"Failed to update local save status: {e}")
             return False
     def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
                 start_new_session=True,  # Detach from parent
             )
         except Exception as e:
+            logger.warning(f"Failed to spawn upload subprocess: {e}")
         return local_path
                 start_new_session=True,  # Detach from parent
             )
         except Exception as e:
+            logger.warning(f"Failed to spawn retry subprocess: {e}")

agent/core/session_uploader.py CHANGED Viewed

@@ -15,10 +15,8 @@ from dotenv import load_dotenv
 load_dotenv()
-# Fallback token for session uploads (write-only access to akseljoonas/hf-agent-sessions)
-_SESSION_TOKEN = "".join([
-    "hf_", "Nzya", "Eeb", "ESz", "DtA", "BoW", "Czj", "SEC", "ZZv", "kVL", "Ac", "Vf", "Sz"
-])
 def upload_session_as_file(

 load_dotenv()
+# Token for session uploads — loaded from env var (never hardcode tokens in source)
+_SESSION_TOKEN = os.environ.get("HF_SESSION_UPLOAD_TOKEN", "")
 def upload_session_as_file(

agent/core/tools.py CHANGED Viewed

@@ -3,10 +3,13 @@ Tool system for the agent
 Provides ToolSpec and ToolRouter for managing both built-in and MCP tools
 """
 import warnings
 from dataclasses import dataclass
 from typing import Any, Awaitable, Callable, Optional
 from fastmcp import Client
 from fastmcp.exceptions import ToolError
 from lmnr import observe
@@ -45,7 +48,6 @@ from agent.tools.hf_repo_git_tool import (
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
-from agent.tools.sandbox_tool import get_sandbox_tools
 # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
 # from agent.tools.private_hf_repo_tools import (
@@ -132,6 +134,7 @@ class ToolRouter:
         for tool in create_builtin_tools():
             self.register_tool(tool)
         if mcp_servers:
             mcp_servers_payload = {}
             for name, server in mcp_servers.items():
@@ -159,7 +162,7 @@ class ToolRouter:
                     handler=None,
                 )
             )
-        print(
             f"Loaded {len(registered_names)} MCP tools: {', '.join(registered_names)} ({skipped_count} disabled)"
         )
@@ -180,7 +183,7 @@ class ToolRouter:
                 handler=search_openapi_handler,
             )
         )
-        print(f"Loaded OpenAPI search tool: {openapi_spec['name']}")
     def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
         """Get tool specifications in OpenAI format"""
@@ -209,7 +212,7 @@ class ToolRouter:
         await self.register_openapi_tool()
         total_tools = len(self.tools)
-        print(f"\nAgent ready with {total_tools} tools total\n")
         return self
@@ -220,7 +223,7 @@ class ToolRouter:
     @observe(name="call_tool")
     async def call_tool(
-        self, tool_name: str, arguments: dict[str, Any], session: Any = None
     ) -> tuple[str, bool]:
         """
         Call a tool and return (output_string, success_bool).
@@ -236,6 +239,9 @@ class ToolRouter:
             # Check if handler accepts session argument
             sig = inspect.signature(tool.handler)
             if "session" in sig.parameters:
                 return await tool.handler(arguments, session=session)
             return await tool.handler(arguments)
@@ -328,10 +334,7 @@ def create_builtin_tools() -> list[ToolSpec]:
         ),
     ]
-    # Sandbox tools
-    tools = get_sandbox_tools() + tools
     tool_names = ", ".join([t.name for t in tools])
-    print(f"Loaded {len(tools)} built-in tools: {tool_names}")
     return tools

 Provides ToolSpec and ToolRouter for managing both built-in and MCP tools
 """
+import logging
 import warnings
 from dataclasses import dataclass
 from typing import Any, Awaitable, Callable, Optional
+logger = logging.getLogger(__name__)
 from fastmcp import Client
 from fastmcp.exceptions import ToolError
 from lmnr import observe
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
 # from agent.tools.private_hf_repo_tools import (
         for tool in create_builtin_tools():
             self.register_tool(tool)
+        self.mcp_client: Client | None = None
         if mcp_servers:
             mcp_servers_payload = {}
             for name, server in mcp_servers.items():
                     handler=None,
                 )
             )
+        logger.info(
             f"Loaded {len(registered_names)} MCP tools: {', '.join(registered_names)} ({skipped_count} disabled)"
         )
                 handler=search_openapi_handler,
             )
         )
+        logger.info(f"Loaded OpenAPI search tool: {openapi_spec['name']}")
     def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
         """Get tool specifications in OpenAI format"""
         await self.register_openapi_tool()
         total_tools = len(self.tools)
+        logger.info(f"Agent ready with {total_tools} tools total")
         return self
     @observe(name="call_tool")
     async def call_tool(
+        self, tool_name: str, arguments: dict[str, Any], session: Any = None, tool_call_id: str | None = None
     ) -> tuple[str, bool]:
         """
         Call a tool and return (output_string, success_bool).
             # Check if handler accepts session argument
             sig = inspect.signature(tool.handler)
             if "session" in sig.parameters:
+                # Check if handler also accepts tool_call_id parameter
+                if "tool_call_id" in sig.parameters:
+                    return await tool.handler(arguments, session=session, tool_call_id=tool_call_id)
                 return await tool.handler(arguments, session=session)
             return await tool.handler(arguments)
         ),
     ]
     tool_names = ", ".join([t.name for t in tools])
+    logger.info(f"Loaded {len(tools)} built-in tools: {tool_names}")
     return tools

agent/prompts/system_prompt.yaml CHANGED Viewed

@@ -1,5 +1,5 @@
 system_prompt: |
-  You are Hugging Face Agent, a skilled AI assistant for machine learning engineering. Hugging Face is a company that provides two main services : libraries to write deep learning tasks, and ressources (models, datasets, compute) to execute them. You will aid users to do theses tasks, interacting with the Hugging Face stack via {{ num_tools }}.
   # General behavior
@@ -9,7 +9,7 @@ system_prompt: |
   **CRITICAL : Research first, Then Implement**
-  For ANY implementation task (training, fine-tuning, inference, data processing, etc.), you should proceed in thoses three mandatory steps:
   1. **FIRST**: Search HF documentation to find the correct approach.
    - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., "trl", "transformers", "diffusers").

 system_prompt: |
+  You are Hugging Face Agent, a skilled AI assistant for machine learning engineering. Hugging Face is a company that provides two main services : libraries to write deep learning tasks, and resources (models, datasets, compute) to execute them. You will aid users to do these tasks, interacting with the Hugging Face stack via {{ num_tools }}.
   # General behavior
   **CRITICAL : Research first, Then Implement**
+  For ANY implementation task (training, fine-tuning, inference, data processing, etc.), you should proceed in these three mandatory steps:
   1. **FIRST**: Search HF documentation to find the correct approach.
    - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., "trl", "transformers", "diffusers").

agent/prompts/system_prompt_v2.yaml CHANGED Viewed

@@ -186,59 +186,61 @@ system_prompt: |
   3. ✅ Determine optimal processing approach based on requirements
   4. ✅ Plan output format and destination
-  ## PHASE 3: IMPLEMENT (Develop in Sandbox, Launch via Jobs)
-  ⚠️ **CRITICAL WORKFLOW: Sandbox First, Jobs Second**
-  For ANY implementation task (training, data processing, inference), follow this pattern:
-  **Step 1: Create a sandbox** — `sandbox_create` with appropriate hardware (cpu-basic for scripting, t4-small for GPU testing)
-  **Step 2: Develop & iterate** — Write scripts, install dependencies, test with small runs, fix errors interactively
-  **Step 3: Launch via hf_jobs** — Once the script works, pass the sandbox file path directly: `hf_jobs(operation="run", script="/app/train.py", ...)`
-  This is the CORRECT pattern:
-  ```
-  sandbox_create(hardware="t4-small")     # interactive dev environment
-  bash("pip install trl transformers")     # install deps
-  write("/app/train.py", "...")            # write training script
-  bash("cd /app && python train.py --max_steps 10")  # test run
-  edit("/app/train.py", ...)               # fix issues
-  bash("cd /app && python train.py --max_steps 10")  # verify fix
-  hf_jobs(operation="run", script="/app/train.py", hardware_flavor="a10g-large", timeout="4h")  # launch at scale
-  ```
-  Do NOT write long inline scripts directly in hf_jobs if necessary — develop in sandbox first.
-  ### Training Script Requirements
-  **Script MUST Include:**
-  - Imports from researched documentation (current APIs)
-  - Trackio initialization with project/run_name/config
-  - Model and tokenizer loading
-  - Dataset loading with verified columns and conversational format
-  - Training config with ALL critical settings:
     - `push_to_hub=True` ⚠️ MANDATORY
     - `hub_model_id="username/model-name"` ⚠️ MANDATORY
     - `report_to=["trackio"]` (for monitoring)
     - `output_dir="./output"`
     - `num_train_epochs`, `per_device_train_batch_size`, `learning_rate`
     - `logging_steps`, `save_steps`
-  - `trainer.train()` call
-  - `trainer.push_to_hub()` at end ⚠️ MANDATORY
-  **hf_jobs Launch Configuration:**
-  - `script`: Path to sandbox file (e.g. "/app/train.py") or inline code
-  - `dependencies`: ['transformers', 'trl', 'torch', 'datasets', 'trackio']
-  - `hardware_flavor`: Based on model size:
-    - 1-3B models: `t4-small` or `a10g-small`
-    - 7-13B models: `a10g-large`
-    - 30B+ models: `a100-large`
-    - 70B+ models: `h100` or `h100x8`
-  - `timeout`: ⚠️ CRITICAL — Small (2-4h), Medium (4-8h), Large (8-24h). NEVER default 30m for training.
   ### For Data Processing Tasks
-  **Same pattern:** develop script in sandbox, test on subset, launch via hf_jobs.
   - Use `cpu-upgrade` or `cpu-performance` for most data tasks
   - Set timeout based on dataset size (1-4 hours typical)
@@ -339,21 +341,6 @@ system_prompt: |
   - ⚠️ Include HF_TOKEN for Hub operations
   - ⚠️ Storage is EPHEMERAL - must push_to_hub
-  ## Sandbox (Interactive Development Environment)
-  **sandbox_create:**
-  - ⚠️ **Create a sandbox FIRST for any implementation task** — develop and test before launching jobs
-  - Persistent remote Linux environment on HF Spaces
-  - First call sandbox_create with hardware choice, then use bash/read/write/edit freely
-  - Hardware: cpu-basic (free tier), cpu-upgrade (8vCPU/32GB), t4-small (16GB GPU), a10g-small (24GB GPU), a10g-large (24GB GPU + 46GB RAM), a100-large (80GB GPU)
-  - `pip install` works out of the box — no special flags needed
-  - Workflow: sandbox_create → write script → test → fix → hf_jobs(script="/app/script.py") to launch at scale
-  **bash / read / write / edit:**
-  - Available after sandbox_create — no additional approvals needed
-  - Same semantics as local file/shell operations, but run on the remote sandbox
-  - bash: run shell commands; read/write/edit: file operations
   **hf_private_repos:**
   - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
   - Upload logs, scripts, results that can't push_to_hub

   3. ✅ Determine optimal processing approach based on requirements
   4. ✅ Plan output format and destination
+  ## PHASE 3: IMPLEMENT (Execute with Researched Approaches)
+  ### For Training Tasks
+  ⚠️ **TRAINING REQUIREMENTS CHECKLIST:**
+  **Before Submission:**
+  - [ ] Researched current TRL documentation
+  - [ ] Found and verified base model
+  - [ ] Found dataset and VALIDATED columns and conversational format matches method
+  - [ ] Selected optimal model + dataset + hardware configuration
+  - [ ] Created plan with plan_tool
+  - [ ] Researched Trackio monitoring setup
+  **Training Script MUST Include:**
+  - [ ] Imports from researched documentation (current APIs)
+  - [ ] Trackio initialization with project/run_name/config
+  - [ ] Model and tokenizer loading
+  - [ ] Dataset loading with verified columns and conversational format
+  - [ ] Training config with ALL critical settings:
     - `push_to_hub=True` ⚠️ MANDATORY
     - `hub_model_id="username/model-name"` ⚠️ MANDATORY
     - `report_to=["trackio"]` (for monitoring)
     - `output_dir="./output"`
     - `num_train_epochs`, `per_device_train_batch_size`, `learning_rate`
     - `logging_steps`, `save_steps`
+    - `max_length` if needed (default 1024 usually fine)
+  - [ ] Trainer initialization with model, args, dataset, tokenizer
+  - [ ] `trainer.train()` call
+  - [ ] `trainer.push_to_hub()` at end ⚠️ MANDATORY
+  - [ ] `tracker.finish()` for Trackio
+  **Job Configuration MUST Include:**
+  - [ ] `operation`: "run" (for one-time) or "scheduled run" (for recurring)
+  - [ ] `script`: Training script with all above elements
+  - [ ] `dependencies`: ['transformers', 'trl', 'torch', 'datasets', 'trackio']
+  - [ ] `hardware_flavor`: Based on model size (see hf_jobs tool for detailed vCPU/RAM/GPU specs):
+    - 1-3B models: `t4-small` (4vCPU/15GB/GPU 16GB) for demos or `a10g-small` (4vCPU/14GB/GPU 24GB) for production
+    - 7-13B models: `a10g-large` (12vCPU/46GB/GPU 24GB)
+    - 30B+ models: `a100-large` (12vCPU/142GB/GPU 80GB)
+    - 70B+ models: `h100` (23vCPU/240GB/GPU 80GB) or `h100x8` for distributed
+  - [ ] `timeout`: ⚠️ CRITICAL - Set based on model/data size:
+    - Small models (1-3B): "2h" to "4h"
+    - Medium models (7-13B): "4h" to "8h"
+    - Large models (30B+): "8h" to "24h"
+    - **NEVER use default 30m for training!**
   ### For Data Processing Tasks
+  **Script Requirements:**
+  - Load dataset with `load_dataset`
+  - Process according to user requirements
+  - Push results with `push_to_hub()` or upload to `hf_private_repos`
+  **Job Configuration:**
   - Use `cpu-upgrade` or `cpu-performance` for most data tasks
   - Set timeout based on dataset size (1-4 hours typical)
   - ⚠️ Include HF_TOKEN for Hub operations
   - ⚠️ Storage is EPHEMERAL - must push_to_hub
   **hf_private_repos:**
   - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
   - Upload logs, scripts, results that can't push_to_hub

agent/prompts/system_prompt_v3.yaml DELETED Viewed

@@ -1,118 +0,0 @@
-system_prompt: |
-  You are Hugging Face Agent, an ML engineering assistant with {{ num_tools }} tools for training, fine-tuning, data processing, inference, and evaluation on the Hugging Face ecosystem.
-  _Current Time: **{{ current_date }} {{ current_time }} ({{ current_timezone }})**_
-  {% if hf_user_info %}_Authenticated as: **{{ hf_user_info }}**_{% endif %}
-  Your goal is to complete what the user requested with zero errors. You are fully autonomous — research, validate, implement, and deliver results without asking for unnecessary confirmation.
-  # Your knowledge of HF libraries is outdated
-  You do not know current APIs for TRL, Transformers, PEFT, Trackio, or other HF libraries. Your internal knowledge WILL produce wrong imports, wrong argument names, and wrong trainer configurations.
-  Before writing any ML implementation code (training, fine-tuning, inference, data processing), ground yourself in current working code:
-    github_find_examples → github_read_file → explore_hf_docs + fetch_hf_docs
-  Skip research only for trivial non-code operations.
-  # Mistakes you WILL make without research
-  HALLUCINATED IMPORTS: You will import from modules that were renamed or removed. Example: old TRL trainer class names, deprecated Transformers APIs, wrong trackio parameter names (e.g. `run_name` instead of `name`). Fix: read a current example script first.
-  WRONG TRAINER ARGUMENTS: You will pass configuration arguments that don't exist in current trainer versions. Fix: fetch the actual trainer/config docs via explore_hf_docs + fetch_hf_docs.
-  WRONG DATASET FORMAT: You will assume column names without checking. Training fails with KeyError. Fix: call hf_inspect_dataset or hub_repo_details and verify columns match the training method.
-  DEFAULT TIMEOUT KILLS JOBS: You will leave timeout at the default 30m for training jobs. Training takes hours. The job gets killed and all progress is lost. Fix: set timeout based on model size (minimum 2h for any training).
-  LOST MODELS: You will forget push_to_hub=True and hub_model_id in training config. Job storage is ephemeral — the filesystem is deleted when the job ends. Without push_to_hub, the trained model is permanently lost.
-  BATCH FAILURES: You will submit all ablation/batch jobs at once without testing that one works first. All will fail for the same bug. Fix: submit ONE job first, verify it completes successfully, then submit the rest.
-  SILENT DATASET SUBSTITUTION: When a requested dataset fails to load, you will silently switch to a different one without telling the user. Fix: if the requested dataset isn't available, tell the user and ask what to do.
-  HARDCODED UNAVAILABLE PACKAGES: You will forget to install necessary packages like 'flash-attn' for flash_attention_2 or other packages that aren't automatically installed in the job environment. Fix: install necessary packages before running the job.
-  SCOPE-CHANGING FIXES: Avoid at all costs! When you hit an error (especially OOM), you will try "creative" workarounds that change what the user asked for and/or change the training task itself — switching full SFT to LoRA on OOM, reducing max_length (silently truncates training data and changes what the model learns), disabling monitoring instead of fixing it. Do not do this. Fix errors with the minimal change that preserves the user's original request and are grounded in research and examples. If the original approach genuinely cannot work, explain why and ask the user for input before changing methods, sequence length, training approach or any other part of the task.
-  # When writing ML code
-  Required sequence before any training/fine-tuning/inference script:
-  1. Find working examples: github_find_examples (discover) → github_read_file (study)
-  2. Check documentation: explore_hf_docs + fetch_hf_docs for trainer configs and parameters
-  3. Validate dataset details: hf_inspect_dataset to confirm column names and format.
-  4. Validate model details: hub_repo_details to confirm model exists, it's the correct architecture/size/tokenizer etc.
-  Dataset format requirements by training method:
-    SFT: "messages", "text", or "prompt"/"completion"
-    DPO: "prompt", "chosen", "rejected"
-    GRPO: "prompt"
-  # When submitting a training job
-  Before calling hf_jobs, output a pre-flight check:
-    - Reference implementation: [which example you based this on]
-    - Dataset format verified: [columns confirmed via hf_inspect_dataset/hub_repo_details]
-    - push_to_hub=True and hub_model_id set
-    - timeout: [value] (based on: [model size] on [hardware])
-    - Trackio monitoring included and working
-  If you cannot fill in all items, stop and complete the missing steps first.
-  For batch/ablation jobs: submit ONE job first. Check logs to confirm it starts training successfully. Only then submit the remaining jobs. Never submit all at once.
-  Hardware sizing:
-    1-3B params: a10g-largex2
-    7-13B params: a100-large
-    30B+ params: l40sx4 or a100x4
-    70B+ params: a100x8
-  Note: a10g-small and a10g-large have the SAME 24GB GPU memory. The difference is CPU/RAM only.
-  # Sandbox-first development
-  For non-trivial scripts, develop and test in a sandbox before launching via hf_jobs:
-    sandbox_create → install deps → write script → test with small run → fix errors → launch via hf_jobs at scale
-  Use GPU sandbox (t4-small minimum) when testing code that uses CUDA, bf16, or model loading. CPU sandboxes cannot test GPU code paths.
-  # When a task has 3+ steps
-  Use plan_tool to track progress. One task in_progress at a time. Mark completed immediately after finishing. Update frequently to show the user what you're doing.
-  # Error recovery
-  When something fails:
-  - Diagnose the actual error. Read the full error message and logs.
-  - Do not retry the exact same thing. Identify what needs to change.
-  - If an API/import error: check documentation for the correct API.
-  - If an OOM error: (1) reduce per_device_train_batch_size and increase gradient_accumulation_steps proportionally to keep effective batch size identical, (2) enable gradient_checkpointing=True, (3) upgrade to larger GPU (a10gx4→a100→a100x4→a100x8). Do NOT switch training methods (e.g. SFT→LoRA) or reduce max_length — those change what the user gets. If OOM happens in sandbox, create a new sandbox with larger GPU hardware.
-  - Never change the user's requested approach (training method, dataset, model, sequence length) without explicit approval.
-  - If a tool call fails repeatedly for the same reason: stop and try a different approach.
-  - Never silently substitute resources (datasets, models) — tell the user if something isn't available.
-  # Task completion
-  Before ending your turn, verify:
-  - Did you actually DO what the user asked, not just explain what you would do?
-  - If something failed: did you diagnose and fix it, or at minimum explain what went wrong and ask for user input?
-  - For training jobs: did you include a working Trackio dashboard URL?
-  Do not stop after describing what you plan to do. Continue calling tools until the task is verifiably done.
-  Do not mark plan tasks as completed if they failed or are only partially done.
-  # Communication
-  - Be concise and direct. No filler, no restating what the user said.
-  - One-word answers when appropriate for simple questions.
-  - Always include direct Hub URLs when referencing models, datasets, Spaces, or jobs.
-  - For errors: state what went wrong, why, and what you're doing to fix it.
-  - Do not over-explain or present elaborate option menus for simple tasks. When the user's intent is clear, act on it. Present options only when there's genuine ambiguity.
-  # Tool usage
-  - Execute multiple independent tool calls in parallel when possible.
-  - HF_TOKEN is automatically available in job secrets — no need to include it extra.
-  - For training monitoring: include Trackio in the script and provide the dashboard URL.
-  - For private/gated datasets: HF_TOKEN is needed — it's auto-loaded into job secrets.

agent/tools/dataset_tools.py CHANGED Viewed

@@ -388,15 +388,22 @@ def _format_parquet_files(data: dict, max_rows: int = 10) -> str | None:
 HF_INSPECT_DATASET_TOOL_SPEC = {
     "name": "hf_inspect_dataset",
     "description": (
-        "Inspect a HF dataset in one call: status, configs/splits, schema, sample rows, parquet info.\n\n"
-        "REQUIRED before any training job to verify dataset format matches training method:\n"
-        "  SFT: needs 'messages', 'text', or 'prompt'/'completion'\n"
-        "  DPO: needs 'prompt', 'chosen', 'rejected'\n"
-        "  GRPO: needs 'prompt'\n"
-        "All datasets used for training have to be in conversational ChatML format to be compatible with HF libraries.'\n"
-        "Training will fail with KeyError if columns don't match.\n\n"
-        "Also use to get example datapoints, understand column names, data types, and available splits before writing any data loading code. "
-        "Supports private/gated datasets when HF_TOKEN is set."
     ),
     "parameters": {
         "type": "object",

 HF_INSPECT_DATASET_TOOL_SPEC = {
     "name": "hf_inspect_dataset",
     "description": (
+        "Inspect a Hugging Face dataset comprehensively in one call.\n\n"
+        "## What you get\n"
+        "- Status check (validates dataset works without errors)\n"
+        "- All configs and splits (row counts/shares may be '?' when metadata is missing)\n"
+        "- Column names and types (schema)\n"
+        "- Sample rows to understand data format\n"
+        "- Parquet file structure and sizes\n\n"
+        "## CRITICAL\n"
+        "**Always inspect datasets before writing training code** to understand:\n"
+        "- Column names for your dataloader\n"
+        "- Data types and format\n"
+        "- Available splits (train/test/validation)\n\n"
+        "Supports private/gated datasets when HF_TOKEN is set.\n\n"
+        "## Examples\n"
+        '{"dataset": "stanfordnlp/imdb"}\n'
+        '{"dataset": "nyu-mll/glue", "config": "mrpc", "sample_rows": 5}\n'
     ),
     "parameters": {
         "type": "object",

agent/tools/docs_tools.py CHANGED Viewed

@@ -845,12 +845,17 @@ DOC_ENDPOINTS = [
 EXPLORE_HF_DOCS_TOOL_SPEC = {
     "name": "explore_hf_docs",
     "description": (
-        "Browse HF documentation structure — discover all available documentation with 200-char previews.\n\n"
-        "Use this to find relevant documentation and/or examples with detailed parameter docs and API reference. "
-        "To be used together with github_find_examples and github_read_file to find working examples and documentation.\n\n"
-        "Pattern: explore_hf_docs (find relevant pages) → fetch_hf_docs (get full content).\n\n"
-        "For training tasks: fetch the trainer config docs (SFTConfig, DPOConfig, GRPOConfig) to verify parameter names. "
-        "Returns top 20 results by default; set max_results (max 50) to adjust."
     ),
     "parameters": {
         "type": "object",
@@ -923,10 +928,16 @@ EXPLORE_HF_DOCS_TOOL_SPEC = {
 HF_DOCS_FETCH_TOOL_SPEC = {
     "name": "fetch_hf_docs",
     "description": (
-        "Fetch full markdown content of an HF documentation page. Use after explore_hf_docs.\n\n"
-        "Critical for finding documentation e.g. current trainer configuration parameters (SFTConfig, DPOConfig, etc.) "
-        "Use for researching solutions and before writing training scripts. Your internal knowledge is outdated.\n\n"
-        "Provide the full URL from explore_hf_docs results. The .md extension is added automatically."
     ),
     "parameters": {
         "type": "object",

 EXPLORE_HF_DOCS_TOOL_SPEC = {
     "name": "explore_hf_docs",
     "description": (
+        "Explore Hugging Face documentation structure and discover available pages with 200-character previews. "
+        "⚠️ MANDATORY: ALWAYS use this BEFORE implementing any ML task (training, fine-tuning, data processing, inference). "
+        "Your training data may be outdated - current documentation is the source of truth. "
+        "**Use when:** (1) Starting any implementation task, (2) User asks 'how to' questions, "
+        "(3) Before writing training/processing code, (4) Researching library capabilities, "
+        "(5) Verifying API syntax and parameters. "
+        "**Pattern:** explore (discover structure) → fetch_hf_docs (get details) → implement with researched approach. "
+        "Returns: Sidebar navigation with titles, URLs, and glimpses of all pages in the selected documentation. "
+        "**Then:** Use fetch_hf_docs with specific URLs from results to get full content. "
+        "**Critical for reliability:** Never implement based on internal knowledge without checking current docs first - APIs change frequently."
+        " By default returns the top 20 results; set max_results (max 50) to adjust."
     ),
     "parameters": {
         "type": "object",
 HF_DOCS_FETCH_TOOL_SPEC = {
     "name": "fetch_hf_docs",
     "description": (
+        "Fetch full markdown content of a specific HF documentation page. "
+        "⚠️ CRITICAL: Use this after explore_hf_docs to get detailed implementation guidance. "
+        "**Use when:** (1) Found relevant page in explore_hf_docs results, (2) Need complete API documentation, "
+        "(3) Need training method details (SFT/DPO/GRPO), (4) Need configuration examples, "
+        "(5) Need parameter descriptions and usage patterns. "
+        "**Pattern:** explore_hf_docs (find relevant page) → fetch_hf_docs (get full content) → implement using documented approach. "
+        "Provide full URL from explore_hf_docs results (e.g., 'https://huggingface.co/docs/trl/sft_trainer'). "
+        "Returns: Complete markdown documentation with examples, parameters, and usage patterns. "
+        "**For training tasks:** ALWAYS fetch trainer docs (SFTConfig, DPOConfig, etc.) before creating training scripts. "
+        "**Critical for reliability:** This ensures you use current APIs and best practices."
     ),
     "parameters": {
         "type": "object",

agent/tools/github_find_examples.py CHANGED Viewed

@@ -405,16 +405,55 @@ def find_examples(
 GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
     "name": "github_find_examples",
     "description": (
-        "Find working example scripts in GitHub repositories (from a list of predetermined directories e.g. examples/, scripts/, tutorials/, etc.). "
-        "Uses fuzzy keyword matching.\n\n"
-        "MANDATORY before writing any ML training, fine-tuning, or inference code. "
-        "Your internal knowledge of library APIs is outdated — working examples show current API patterns.\n\n"
-        "Sequence: github_find_examples → github_read_file (study the example) → implement based on what you found.\n\n"
-        "Skip this only for: simple data queries, status checks, non-code tasks.\n\n"
-        "Examples:\n"
-        "  {keyword: 'sft', repo: 'trl'} → finds examples/scripts/sft.py\n"
-        "  {keyword: 'grpo', repo: 'trl'} → finds GRPO training examples\n"
-        "  {repo: 'trl', max_results: 20} → lists all available training method examples"
     ),
     "parameters": {
         "type": "object",

 GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
     "name": "github_find_examples",
     "description": (
+        "Discover working code examples, tutorials, scripts, and demos in GitHub repositories. "
+        "⚠️ CRITICAL: ALWAYS use this BEFORE implementing ML tasks - find working reference code first. "
+        "Your training data may be outdated; real repository examples show current best practices. "
+        "**Use when:** (1) Starting any ML implementation (training, inference, evaluation), "
+        "(2) User asks 'how to' questions about libraries, (3) Need reference implementations, "
+        "(4) Exploring library capabilities, (5) Before writing training/processing scripts. "
+        "**Pattern:** github_find_examples (discover) → github_read_file (study code) → implement with researched approach. "
+        "Returns: List of example files (scripts/notebooks/tutorials) with paths and URLs, sorted by relevance. "
+        "**Then:** Use github_read_file to read the actual implementation code. "
+        "**Critical for reliability:** Real examples prevent outdated API usage and show proven patterns. "
+        "## How it works\n\n"
+        "1. Fetches all example files (examples/, scripts/, tutorials/, demos/, notebooks/, etc.) from repository\n"
+        "2. If keyword provided, scores files against keyword using fuzzy matching\n"
+        "3. Returns best matches sorted by relevance and pattern priority\n"
+        "4. Provides copyable parameters for github_read_file tool\n\n"
+        "## Examples\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Find GRPO training examples before implementation\n"
+        "// Task: Starting GRPO fine-tuning project, need reference implementation\n"
+        "{\n"
+        "  keyword: 'grpo',\n"
+        "  repo: 'trl',\n"
+        "  org: 'huggingface'\n"
+        "}\n"
+        "// Returns: examples/scripts/grpo_agent.py, examples/scripts/grpo_vlm.py\n"
+        "// Next step: github_read_file to study working implementation\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Discover all available training methods\n"
+        "// Task: Exploring TRL training options before choosing approach\n"
+        "{\n"
+        "  repo: 'trl',\n"
+        "  org: 'huggingface',\n"
+        "  max_results: 20\n"
+        "}\n"
+        "// Lists: SFT, DPO, GRPO, PPO, reward modeling examples\n"
+        "// Helps user choose appropriate method\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Find LoRA fine-tuning examples\n"
+        "// Task: Learning parameter-efficient fine-tuning patterns\n"
+        "{\n"
+        "  keyword: 'lora',\n"
+        "  repo: 'peft',\n"
+        "  org: 'huggingface'\n"
+        "}\n"
+        "// Discovers LoRA configuration and training examples\n"
+        "// Shows current PEFT API usage patterns\n"
+        "</example>"
     ),
     "parameters": {
         "type": "object",

agent/tools/github_read_file.py CHANGED Viewed

@@ -250,13 +250,59 @@ def read_file(
 GITHUB_READ_FILE_TOOL_SPEC = {
     "name": "github_read_file",
     "description": (
-        "Read file contents from GitHub repositories. Returns first 300 lines by default. "
-        "Auto-converts Jupyter notebooks to markdown.\n\n"
-        "Use AFTER github_find_examples to study the working implementation. "
-        "The purpose is to learn current API patterns — imports, trainer configs, dataset handling — "
-        "so your implementation uses correct, up-to-date code.\n\n"
         "Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n"
-        "When NOT to use: when you don't know the file path (use github_find_examples first)."
     ),
     "parameters": {
         "type": "object",

 GITHUB_READ_FILE_TOOL_SPEC = {
     "name": "github_read_file",
     "description": (
+        "Read file contents from GitHub repositories with line range support (default 300 lines). "
+        "⚠️ CRITICAL: Use AFTER github_find_examples to study working implementation code. "
+        "**Use when:** (1) Found example file via github_find_examples and need full code, "
+        "(2) Need to read trainer class implementation, (3) Study configuration patterns, "
+        "(4) Read specific code sections with line ranges, (5) Review code from specific branches/commits. "
+        "**Pattern:** github_find_examples (discover files) → github_read_file (read code) → implement using researched patterns. "
+        "Returns: File contents with line numbers, formatted for LLM reading. Auto-converts Jupyter notebooks to markdown. "
+        "**Then:** Implement using patterns and APIs from the example code. "
+        "**Critical for reliability:** Reading working examples prevents API errors and shows current best practices. "
         "Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n"
+        "## When to use this tool\n\n"
+        "- When reading example code, trainer implementations, or configuration files\n"
+        "- After github_find_examples returns file paths you want to study\n"
+        "- When investigating specific code sections with line ranges\n"
+        "- When reading from specific branches, tags, or commits (use ref parameter)\n\n"
+        "## When NOT to use this tool\n\n"
+        "- When you don't know exact file path (use github_find_examples or github_search_code first)\n"
+        "- When searching for code patterns across repos (use github_search_code instead)\n\n"
+        "## Examples\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Read GRPO trainer class after finding via github_find_examples\n"
+        "// Use case: Understand GRPOTrainer API, parameters, and methods\n"
+        "{\n"
+        "  repo: 'huggingface/trl',\n"
+        "  path: 'trl/trainer/grpo_trainer.py',\n"
+        "  line_start: 1,\n"
+        "  line_end: 200\n"
+        "}\n"
+        "// Read class definition and constructor to understand current API\n"
+        "// Shows: __init__ parameters, configuration, required arguments\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Study complete training script from examples\n"
+        "// Use case: Learn end-to-end VLM fine-tuning workflow\n"
+        "{\n"
+        "  repo: 'huggingface/trl',\n"
+        "  path: 'examples/scripts/grpo_vlm.py'\n"
+        "}\n"
+        "// Returns first 300 lines - shows full training setup\n"
+        "// Use line_start/line_end if need to read more\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Check TrainingArguments configuration patterns\n"
+        "// Use case: Learn how to structure training configs correctly\n"
+        "{\n"
+        "  repo: 'huggingface/transformers',\n"
+        "  path: 'examples/pytorch/language-modeling/run_clm.py',\n"
+        "  line_start: 50,\n"
+        "  line_end: 150\n"
+        "}\n"
+        "// Read argument parsing and config setup section\n"
+        "// Shows: current parameter names, default values, best practices\n"
+        "</example>"
     ),
     "parameters": {
         "type": "object",

agent/tools/jobs_tool.py CHANGED Viewed

@@ -9,7 +9,9 @@ import base64
 import http.client
 import os
 import re
-from typing import Any, Awaitable, Callable, Dict, Literal, Optional
 import httpx
 from huggingface_hub import HfApi
@@ -17,6 +19,8 @@ from huggingface_hub.utils import HfHubHTTPError
 from agent.core.session import Event
 from agent.tools.types import ToolResult
 from agent.tools.utilities import (
     format_job_details,
     format_jobs_table,
@@ -25,33 +29,38 @@ from agent.tools.utilities import (
 )
 # Hardware flavors
-CPU_FLAVORS = ["cpu-basic", "cpu-upgrade"]
 GPU_FLAVORS = [
     "t4-small",
     "t4-medium",
-    "a10g-small",
-    "a10g-large",
-    "a10g-largex2",
-    "a10g-largex4",
-    "a100-large",
-    "a100x4",
-    "a100x8",
     "l4x1",
     "l4x4",
     "l40sx1",
     "l40sx4",
     "l40sx8",
 ]
 # Detailed specs for display (vCPU/RAM/GPU VRAM)
-CPU_FLAVORS_DESC = "cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB)"
 GPU_FLAVORS_DESC = (
     "t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), "
-    "a10g-small(4vCPU/15GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), "
-    "a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), "
-    "a100-large(12vCPU/142GB/GPU 80GB), a100x4(48vCPU/568GB/GPU 320GB), a100x8(96vCPU/1136GB/GPU 640GB), "
     "l4x1(8vCPU/30GB/GPU 24GB), l4x4(48vCPU/186GB/GPU 96GB), "
-    "l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB)"
 )
 SPECIALIZED_FLAVORS = ["inf2x6"]
 ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
@@ -113,23 +122,11 @@ def _filter_uv_install_output(logs: list[str]) -> list[str]:
     return logs
-_DEFAULT_ENV = {
-    "HF_HUB_DISABLE_PROGRESS_BARS": "1",
-    "TQDM_DISABLE": "1",
-    "TRANSFORMERS_VERBOSITY": "warning",
-    "HF_HUB_ENABLE_HF_TRANSFER": "1",
-}
-def _add_default_env(params: Dict[str, Any] | None) -> Dict[str, Any]:
-    """Inject default env vars for clean, agent-friendly output."""
-    result = dict(_DEFAULT_ENV)
-    result.update(params or {})  # user-provided values override defaults
-    return result
-def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
-    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
     # Start with user-provided env vars, then force-set token last
     result = dict(params or {})
@@ -285,10 +282,15 @@ class HfJobsTool:
         hf_token: Optional[str] = None,
         namespace: Optional[str] = None,
         log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
     ):
         self.api = HfApi(token=hf_token)
         self.namespace = namespace
         self.log_callback = log_callback
     async def execute(self, params: Dict[str, Any]) -> ToolResult:
         """Execute the specified operation"""
@@ -384,9 +386,7 @@ class HfJobsTool:
                 def log_producer():
                     try:
                         # fetch_job_logs is a blocking sync generator
-                        logs_gen = self.api.fetch_job_logs(
-                            job_id=job_id, namespace=namespace
-                        )
                         for line in logs_gen:
                             # Push line to queue thread-safely
                             loop.call_soon_threadsafe(queue.put_nowait, line)
@@ -413,7 +413,7 @@ class HfJobsTool:
                     # Process log line
                     log_line = item
-                    print("\t" + log_line)
                     if self.log_callback:
                         await self.log_callback(log_line)
                     all_logs.append(log_line)
@@ -441,19 +441,19 @@ class HfJobsTool:
                     if current_status in terminal_states:
                         # Job finished, no need to retry
-                        print(f"\tJob reached terminal state: {current_status}")
                         break
                     # Job still running, retry connection
-                    print(
-                        f"\tConnection interrupted ({str(e)[:50]}...), reconnecting in {retry_delay}s..."
                     )
                     await asyncio.sleep(retry_delay)
                     continue
                 except (ConnectionError, TimeoutError, OSError):
                     # Can't even check job status, wait and retry
-                    print(f"\tConnection error, retrying in {retry_delay}s...")
                     await asyncio.sleep(retry_delay)
                     continue
@@ -509,16 +509,30 @@ class HfJobsTool:
                 self.api.run_job,
                 image=image,
                 command=command,
-                env=_add_default_env(args.get("env")),
-                secrets=_add_environment_variables(args.get("secrets")),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
             )
             # Wait for completion and stream logs
-            print(f"{job_type} job started: {job.url}")
-            print("Streaming logs...\n---\n")
             final_status, all_logs = await self._wait_for_job_completion(
                 job_id=job.id,
@@ -727,8 +741,8 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
                 image=image,
                 command=command,
                 schedule=schedule,
-                env=_add_default_env(args.get("env")),
-                secrets=_add_environment_variables(args.get("secrets")),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
@@ -887,31 +901,56 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_
 HF_JOBS_TOOL_SPEC = {
     "name": "hf_jobs",
     "description": (
-        "Execute Python scripts or Docker containers on HF cloud infrastructure.\n\n"
-        "Two modes (mutually exclusive): Python mode (script + dependencies) or Docker mode (command + image). "
-        "Provide exactly ONE of 'script' or 'command'.\n\n"
-        "BEFORE submitting training/fine-tuning jobs:\n"
-        "- You MUST have called github_find_examples + github_read_file to find a working reference implementation. "
-        "Scripts based on your internal knowledge WILL use outdated APIs and fail.\n"
-        "- You MUST have validated dataset format via hf_inspect_dataset or hub_repo_details.\n"
-        "- Training config MUST include push_to_hub=True and hub_model_id. "
-        "Job storage is EPHEMERAL — all files are deleted when the job ends. Without push_to_hub, trained models are lost permanently.\n"
-        "- Include trackio monitoring and provide the dashboard URL to the user.\n\n"
-        "BATCH/ABLATION JOBS: Submit ONE job first. Check logs to confirm it starts training successfully. "
-        "Only then submit the remaining jobs. Never submit all at once — if there's a bug, all jobs fail.\n\n"
-        "Operations: run, ps, logs, inspect, cancel, scheduled run/ps/inspect/delete/suspend/resume.\n\n"
-        f"Hardware: CPU: {CPU_FLAVORS_DESC}. GPU: {GPU_FLAVORS_DESC}.\n"
-        "Common picks: t4-small ($0.60/hr, 1-3B), a10g-large ($2/hr, 7-13B), a100-large ($4/hr, 30B+), h100 ($6/hr, 70B+). "
-        "Note: a10g-small and a10g-large have the SAME 24GB GPU — the difference is CPU/RAM only.\n\n"
-        "OOM RECOVERY: When a training job fails with CUDA OOM:\n"
-        "1. Reduce per_device_train_batch_size and increase gradient_accumulation_steps proportionally (keep effective batch size identical)\n"
-        "2. Enable gradient_checkpointing=True\n"
-        "3. Upgrade to larger GPU (a10g→a100→h100)\n"
-        "Do NOT switch training methods (e.g. full SFT to LoRA) or reduce max_length — those change what the user gets and require explicit approval.\n\n"
-        "Examples:\n"
-        "Training: {'operation': 'run', 'script': '/app/train.py', 'dependencies': ['transformers', 'trl', 'torch', 'datasets', 'trackio'], 'hardware_flavor': 'a100-large', 'timeout': '8h'}\n"
-        "Monitor: {'operation': 'ps'}, {'operation': 'logs', 'job_id': 'xxx'}, {'operation': 'cancel', 'job_id': 'xxx'}"
-        "Docker: {'operation': 'run', 'command': ['duckdb', '-c', 'select 1 + 2'], 'image': 'duckdb/duckdb', 'hardware_flavor': 'cpu-basic', 'timeout': '1h'}\n"
     ),
     "parameters": {
         "type": "object",
@@ -931,65 +970,58 @@ HF_JOBS_TOOL_SPEC = {
                     "scheduled suspend",
                     "scheduled resume",
                 ],
-                "description": "Operation to execute.",
             },
             "script": {
                 "type": "string",
-                "description": (
-                    "Python code or sandbox file path (e.g. '/app/train.py') or URL. "
-                    "Triggers Python mode. For ML training: base this on a working example found via github_find_examples, not on internal knowledge. "
-                    "Mutually exclusive with 'command'."
-                ),
             },
             "dependencies": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": (
-                    "Pip packages to install. Include ALL required packages. "
-                    "Common training set: ['transformers', 'trl', 'torch', 'datasets', 'trackio', 'accelerate']. "
-                    "Only used with 'script'."
-                ),
             },
             "image": {
                 "type": "string",
-                "description": "Docker image. Optional — auto-selected if not provided. Use with 'command'.",
             },
             "command": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": "Command to execute as list. Triggers Docker mode. Mutually exclusive with 'script'.",
             },
             "hardware_flavor": {
                 "type": "string",
-                "description": (
-                    "Hardware type. Sizing guide: 1-3B params → t4-small/a10g-small, "
-                    "7-13B → a10g-large, 30B+ → a100-large, 70B+ → h100/h100x8. "
-                    f"All options: CPU: {CPU_FLAVORS}. GPU: {GPU_FLAVORS}."
-                ),
             },
             "timeout": {
                 "type": "string",
-                "description": (
-                    "Maximum job runtime. MUST be >2h for any training job — default 30m kills training mid-run. "
-                    "Guidelines: 1-3B models: 3-4h, 7-13B: 6-8h, 30B+: 12-24h. "
-                    "Use 30m-1h only for quick data processing or inference tasks. Default: '30m'."
-                ),
             },
             "env": {
                 "type": "object",
-                "description": "Environment variables {'KEY': 'VALUE'}. HF_TOKEN is auto-included.",
             },
             "job_id": {
                 "type": "string",
-                "description": "Job ID. Required for: logs, inspect, cancel.",
             },
             "scheduled_job_id": {
                 "type": "string",
-                "description": "Scheduled job ID. Required for: scheduled inspect/delete/suspend/resume.",
             },
             "schedule": {
                 "type": "string",
-                "description": "Cron schedule or preset (@hourly, @daily, @weekly, @monthly). Required for: scheduled run.",
             },
         },
         "required": ["operation"],
@@ -998,7 +1030,7 @@ HF_JOBS_TOOL_SPEC = {
 async def hf_jobs_handler(
-    arguments: Dict[str, Any], session: Any = None
 ) -> tuple[str, bool]:
     """Handler for agent tool router"""
     try:
@@ -1009,36 +1041,20 @@ async def hf_jobs_handler(
                     Event(event_type="tool_log", data={"tool": "hf_jobs", "log": log})
                 )
-        # If script is a sandbox file path, read it from the sandbox
-        script = arguments.get("script", "")
-        sandbox = getattr(session, "sandbox", None) if session else None
-        is_path = (
-            sandbox
-            and isinstance(script, str)
-            and script.strip() == script
-            and not any(c in script for c in "\r\n\0")
-            and (
-                script.startswith("/")
-                or script.startswith("./")
-                or script.startswith("../")
-            )
         )
-        if is_path:
-            import shlex
-            result = await asyncio.to_thread(sandbox.bash, f"cat {shlex.quote(script)}")
-            if not result.success:
-                return f"Failed to read {script} from sandbox: {result.error}", False
-            arguments = {**arguments, "script": result.output}
-        # Get token and namespace from HF token
-        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
-        namespace = HfApi(token=hf_token).whoami().get("name") if hf_token else None
         tool = HfJobsTool(
             namespace=namespace,
             hf_token=hf_token,
             log_callback=log_callback if session else None,
         )
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)

 import http.client
 import os
 import re
+from typing import Any, Dict, Literal, Optional, Callable, Awaitable
+import logging
 import httpx
 from huggingface_hub import HfApi
 from agent.core.session import Event
 from agent.tools.types import ToolResult
+logger = logging.getLogger(__name__)
 from agent.tools.utilities import (
     format_job_details,
     format_jobs_table,
 )
 # Hardware flavors
+CPU_FLAVORS = ["cpu-basic", "cpu-upgrade", "cpu-performance", "cpu-xl"]
 GPU_FLAVORS = [
+    "sprx8",
+    "zero-a10g",
     "t4-small",
     "t4-medium",
     "l4x1",
     "l4x4",
     "l40sx1",
     "l40sx4",
     "l40sx8",
+    "a10g-small",
+    "a10g-large",
+    "a10g-largex2",
+    "a10g-largex4",
+    "a100-large",
+    "h100",
+    "h100x8",
 ]
 # Detailed specs for display (vCPU/RAM/GPU VRAM)
+CPU_FLAVORS_DESC = (
+    "cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB), cpu-performance, cpu-xl"
+)
 GPU_FLAVORS_DESC = (
     "t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), "
     "l4x1(8vCPU/30GB/GPU 24GB), l4x4(48vCPU/186GB/GPU 96GB), "
+    "l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB), "
+    "a10g-small(4vCPU/14GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), "
+    "a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), "
+    "a100-large(12vCPU/142GB/GPU 80GB), h100(23vCPU/240GB/GPU 80GB), h100x8(184vCPU/1920GB/GPU 640GB), "
+    "zero-a10g(dynamic alloc)"
 )
 SPECIALIZED_FLAVORS = ["inf2x6"]
 ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
     return logs
+def _add_environment_variables(
+    params: Dict[str, Any] | None, user_token: str | None = None
+) -> Dict[str, Any]:
+    # Prefer the authenticated user's OAuth token, fall back to global env var
+    token = user_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
     # Start with user-provided env vars, then force-set token last
     result = dict(params or {})
         hf_token: Optional[str] = None,
         namespace: Optional[str] = None,
         log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
+        session: Any = None,
+        tool_call_id: Optional[str] = None,
     ):
+        self.hf_token = hf_token
         self.api = HfApi(token=hf_token)
         self.namespace = namespace
         self.log_callback = log_callback
+        self.session = session
+        self.tool_call_id = tool_call_id
     async def execute(self, params: Dict[str, Any]) -> ToolResult:
         """Execute the specified operation"""
                 def log_producer():
                     try:
                         # fetch_job_logs is a blocking sync generator
+                        logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=namespace)
                         for line in logs_gen:
                             # Push line to queue thread-safely
                             loop.call_soon_threadsafe(queue.put_nowait, line)
                     # Process log line
                     log_line = item
+                    logger.debug(log_line)
                     if self.log_callback:
                         await self.log_callback(log_line)
                     all_logs.append(log_line)
                     if current_status in terminal_states:
                         # Job finished, no need to retry
+                        logger.info(f"Job reached terminal state: {current_status}")
                         break
                     # Job still running, retry connection
+                    logger.warning(
+                        f"Connection interrupted ({str(e)[:50]}...), reconnecting in {retry_delay}s..."
                     )
                     await asyncio.sleep(retry_delay)
                     continue
                 except (ConnectionError, TimeoutError, OSError):
                     # Can't even check job status, wait and retry
+                    logger.warning(f"Connection error, retrying in {retry_delay}s...")
                     await asyncio.sleep(retry_delay)
                     continue
                 self.api.run_job,
                 image=image,
                 command=command,
+                env=args.get("env"),
+                secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
             )
+            # Send job URL immediately after job creation (before waiting for completion)
+            if self.session and self.tool_call_id:
+                await self.session.send_event(
+                    Event(
+                        event_type="tool_state_change",
+                        data={
+                            "tool_call_id": self.tool_call_id,
+                            "tool": "hf_jobs",
+                            "state": "running",
+                            "jobUrl": job.url,
+                        },
+                    )
+                )
             # Wait for completion and stream logs
+            logger.info(f"{job_type} job started: {job.url}")
+            logger.info("Streaming logs...")
             final_status, all_logs = await self._wait_for_job_completion(
                 job_id=job.id,
                 image=image,
                 command=command,
                 schedule=schedule,
+                env=args.get("env"),
+                secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
 HF_JOBS_TOOL_SPEC = {
     "name": "hf_jobs",
     "description": (
+        "Execute Python scripts or Docker containers on HF cloud infrastructure (CPUs/GPUs) in one of two modes. "
+        "\n\n"
+        "**Two Modes (mutually exclusive):**\n"
+        "1. Python mode: using 'script' arg (REQUIRED) + 'dependencies'\n"
+        "2. Docker mode: using 'command' arg (REQUIRED) + 'image'\n\n"
+        "🚨 **REQUIRED:** You MUST provide exactly ONE of: 'script' (Python code as string) OR 'command' (Docker command as array). "
+        "They are mutually exclusive - provide one or the other, never both, never neither. "
+        "Do NOT call with just {'operation': 'run'} - always include your code. Example: {'operation': 'run', 'script': 'import torch; print(torch.cuda.is_available())', 'dependencies': ['torch']} or {'operation': 'run', 'command': ['duckdb', '-c', 'select 1 + 2']', 'image': 'duckdb/duckdb'}\n\n"
+        "⚠️ CRITICAL for reliability: (1) Jobs run ASYNC - provide monitoring URL immediately, don't poll; "
+        "(2) Set timeout >30min (default too short - training needs 2-8h); "
+        "(3) HF_TOKEN auto-loaded to secrets for Hub ops (push_to_hub, private repos); "
+        "(4) Job storage EPHEMERAL - MUST push_to_hub() or ALL work is LOST. "
+        "**Use when:** User wants cloud compute, training models, data processing, batch inference, GPU workloads, scheduled tasks. "
+        "ALWAYS use this tool (✓), never bash 'hf jobs' commands (✗). Pass script content inline (✓), don't save to files unless requested (✗). "
+        "\n\n"
+        "**Operations:** run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume. "
+        "**Available Hardware (vCPU/RAM/GPU):**\n"
+        f"• CPU: {CPU_FLAVORS_DESC}\n"
+        f"• GPU: {GPU_FLAVORS_DESC}\n"
+        "  ◦ Common: t4-small ($0.60/hr, demos/1-3B models), a10g-small ($1/hr), a10g-large ($2/hr, production 7-13B), a100-large ($4/hr, 30B+), h100 ($6/hr, 70B+)\n\n"
+        "**After Submission Ground Rules:**\n"
+        "✓ Return immediately with job ID and monitoring URL\n"
+        "✓ Provide expected completion time and cost estimate\n"
+        "✓ For training: Include Trackio dashboard URL\n"
+        "✓ Note user can check status later\n"
+        "✗ DON'T poll logs automatically\n"
+        "✗ DON'T wait for completion\n"
+        "✗ DON'T check status unless user asks\n\n"
+        "**For Training Tasks:**\n"
+        "• ALWAYS research TRL docs first: explore_hf_docs('trl') → fetch_hf_docs(<trainer_url>)\n"
+        "• ALWAYS validate dataset format with hub_repo_details (SFT needs messages/text, DPO needs chosen/rejected)\n"
+        "• ALWAYS include Trackio monitoring in script (explore_hf_docs('trackio'))\n"
+        "• ALWAYS enable push_to_hub=True in training config\n"
+        "• Set timeout 2-8h for training (NOT default 30m)\n"
+        "• Confirm model/dataset choices with user before submitting\n\n"
+        "**Examples:**\n\n"
+        "**Training - Fine-tune LLM:**\n"
+        "{'operation': 'run', 'script': '# Training script with TRL\\nfrom trl import SFTConfig, SFTTrainer\\nfrom transformers import AutoModelForCausalLM\\nmodel = AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen3-4B\")\\n# ... researched implementation from docs ...\\ntrainer.train()\\ntrainer.push_to_hub(\"user-name/my-model\")', 'dependencies': ['transformers', 'trl', 'torch', 'datasets', 'trackio'], 'hardware_flavor': 'a10g-large', 'timeout': '4h'}\n\n"
+        "**Data Processing:**\n"
+        "{'operation': 'run', 'script': 'from datasets import load_dataset\\nds = load_dataset(\"data\")\\n# process...\\nds.push_to_hub(\"user/processed\")', 'dependencies': ['datasets', 'pandas'], 'hardware_flavor': 'cpu-upgrade', 'timeout': '2h'}\n\n"
+        "**Scheduled Daily Job:**\n"
+        "{'operation': 'scheduled run', 'schedule': '@daily', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'hardware_flavor': 'cpu-basic'}\n\n"
+        "**Docker Mode:**\n"
+        "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
+        "**Monitor Operations:**\n"
+        "{'operation': 'ps'} - List all jobs\n"
+        "{'operation': 'logs', 'job_id': 'xxx'} - Stream logs (only when user requests)\n"
+        "{'operation': 'inspect', 'job_id': 'xxx'} - Get job details\n"
+        "{'operation': 'cancel', 'job_id': 'xxx'} - Stop job\n\n"
+        "⚠️ CRITICAL: Files created during execution are DELETED when job finishes. MUST push_to_hub() all outputs (models, datasets, artifacts) in script. For logs/scripts, use hf_private_repos after completion."
     ),
     "parameters": {
         "type": "object",
                     "scheduled suspend",
                     "scheduled resume",
                 ],
+                "description": (
+                    "Operation to execute. Valid values: [run, ps, logs, inspect, cancel, "
+                    "scheduled run, scheduled ps, scheduled inspect, scheduled delete, "
+                    "scheduled suspend, scheduled resume]"
+                ),
             },
+            # Python/UV specific parameters
             "script": {
                 "type": "string",
+                "description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
             },
             "dependencies": {
                 "type": "array",
                 "items": {"type": "string"},
+                "description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
             },
+            # Docker specific parameters
             "image": {
                 "type": "string",
+                "description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
             },
             "command": {
                 "type": "array",
                 "items": {"type": "string"},
+                "description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
             },
+            # Hardware and environment
             "hardware_flavor": {
                 "type": "string",
+                "description": f"Hardware type. Available CPU flavors: {CPU_FLAVORS}. Available GPU flavors: {GPU_FLAVORS}. Use with 'run'/'scheduled run'.",
             },
             "timeout": {
                 "type": "string",
+                "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
             },
             "env": {
                 "type": "object",
+                "description": "Environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is automatically included from your auth. Use with 'run'/'scheduled run'.",
             },
+            # Job management parameters
             "job_id": {
                 "type": "string",
+                "description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
             },
+            # Scheduled job parameters
             "scheduled_job_id": {
                 "type": "string",
+                "description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
             },
             "schedule": {
                 "type": "string",
+                "description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
             },
         },
         "required": ["operation"],
 async def hf_jobs_handler(
+    arguments: Dict[str, Any], session: Any = None, tool_call_id: str | None = None
 ) -> tuple[str, bool]:
     """Handler for agent tool router"""
     try:
                     Event(event_type="tool_log", data={"tool": "hf_jobs", "log": log})
                 )
+        # Prefer the authenticated user's OAuth token, fall back to global env
+        hf_token = (
+            (getattr(session, "hf_token", None) if session else None)
+            or os.environ.get("HF_TOKEN")
+            or os.environ.get("HUGGINGFACE_HUB_TOKEN")
         )
+        namespace = os.environ.get("HF_NAMESPACE") or (HfApi(token=hf_token).whoami().get("name") if hf_token else None)
         tool = HfJobsTool(
             namespace=namespace,
             hf_token=hf_token,
             log_callback=log_callback if session else None,
+            session=session,
+            tool_call_id=tool_call_id,
         )
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)

agent/tools/plan_tool.py CHANGED Viewed

@@ -85,11 +85,18 @@ def get_current_plan() -> List[Dict[str, str]]:
 PLAN_TOOL_SPEC = {
     "name": "plan_tool",
     "description": (
-        "Track progress on multi-step tasks with a todo list (pending/in_progress/completed).\n\n"
-        "Use for tasks with 3+ steps. Each call replaces the entire plan (send full list).\n\n"
-        "Rules: exactly ONE task in_progress at a time. Mark completed immediately after finishing. "
-        "Only mark completed when the task fully succeeded — keep in_progress if there are errors. "
-        "Update frequently so the user sees progress."
     ),
     "parameters": {
         "type": "object",

 PLAN_TOOL_SPEC = {
     "name": "plan_tool",
     "description": (
+        "Manage task planning and progress tracking with todo list (pending/in_progress/completed statuses). "
+        "⚠️ CRITICAL: ALWAYS use for multi-step tasks (3+ steps) and MUST update frequently to show progress. "
+        "**Use when:** (1) User provides multiple tasks, (2) Complex workflows (training, evaluation, data processing), "
+        "(3) Tasks requiring multiple tool calls, (4) Need to communicate progress clearly to user, "
+        "(5) Breaking down ambiguous requests into concrete steps. "
+        "**Pattern:** Create plan at start → Mark in_progress when starting task → Mark completed immediately after finishing → User sees clear progress. "
+        "Each call replaces entire plan (full list required). "
+        "**Critical for reliability:** Exactly ONE task in_progress at a time (not zero, not multiple). "
+        "Mark tasks completed IMMEDIATELY after finishing - don't batch completions. "
+        "**For long-running tasks:** Update plan after each major step to keep user informed. "
+        "**Only mark completed when:** Task fully accomplished, no errors, all requirements met. "
+        "Keep tasks pending if blocked/errors occur - create new task to resolve blockers."
     ),
     "parameters": {
         "type": "object",

agent/tools/sandbox_client.py DELETED Viewed

@@ -1,714 +0,0 @@
-#!/usr/bin/env python3
-# /// script
-# requires-python = ">=3.10"
-# dependencies = ["huggingface_hub>=0.20.0", "httpx>=0.27.0"]
-# ///
-"""
-Sandbox Tools — Agent-native primitives for HF Space dev-mode sandboxes.
-Architecture:
-  - Creates a sandbox by duplicating a template Space (runs sandbox_server.py)
-  - Waits for it to come online
-  - Communicates via HTTPS to the Space's API
-  - Optionally deletes the Space when done
-Lifecycle:
-    sb = Sandbox.create(owner="burtenshaw")         # duplicate, wait, connect
-    sb = Sandbox.create(owner="burtenshaw",          # with options
-                        hardware="t4-small",
-                        private=True,
-                        sleep_time=3600)
-    sb = Sandbox.connect("burtenshaw/my-sandbox-abc") # attach to existing
-    sb.bash("uv run train.py")
-    sb.read("/app/train.py")
-    sb.edit("/app/train.py", old_str="lr=1e-3", new_str="lr=1e-4")
-    sb.delete()                                       # tear down when done
-    # Or use as a context manager for automatic cleanup
-    with Sandbox.create(owner="burtenshaw") as sb:
-        sb.bash("python train.py")
-    # Space deleted on exit
-Tools: bash, read, write, edit, upload
-"""
-from __future__ import annotations
-import io
-import os
-import sys
-import time
-import uuid
-from dataclasses import dataclass, field
-from typing import Any
-import httpx
-from huggingface_hub import CommitOperationAdd, HfApi
-TEMPLATE_SPACE = "burtenshaw/sandbox"
-HARDWARE_OPTIONS = [
-    "cpu-basic",
-    "cpu-upgrade",
-    "t4-small",
-    "t4-medium",
-    "a10g-small",
-    "a10g-large",
-    "a100-large",
-]
-OUTPUT_LIMIT = 30000
-LINE_LIMIT = 2000
-DEFAULT_READ_LIMIT = 2000
-DEFAULT_TIMEOUT = 120
-MAX_TIMEOUT = 600
-WAIT_TIMEOUT = 300
-WAIT_INTERVAL = 5
-API_WAIT_TIMEOUT = 180
-_DOCKERFILE = """\
-FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
-RUN apt-get update && \\
-    apt-get install -y \\
-      bash git git-lfs wget curl procps \\
-      htop vim nano jq tmux \\
-      build-essential && \\
-    rm -rf /var/lib/apt/lists/*
-RUN uv pip install --system fastapi uvicorn python-multipart
-RUN useradd -m -u 1000 user
-USER user
-ENV HOME=/home/user \\
-    PATH=/home/user/.local/bin:$PATH \\
-    PIP_USER=1 \\
-    HF_HUB_DISABLE_PROGRESS_BARS=1 \\
-    TQDM_DISABLE=1 \\
-    TRANSFORMERS_VERBOSITY=warning \\
-    HF_HUB_ENABLE_HF_TRANSFER=1
-WORKDIR /app
-COPY --chown=user . /app
-EXPOSE 7860
-CMD ["python", "sandbox_server.py"]
-"""
-_SANDBOX_SERVER = '''\
-"""Minimal FastAPI server for sandbox operations."""
-import os, subprocess, pathlib
-from fastapi import FastAPI
-from pydantic import BaseModel
-from typing import Optional
-import uvicorn
-app = FastAPI()
-class BashReq(BaseModel):
-    command: str
-    work_dir: str = "/app"
-    timeout: int = 120
-class ReadReq(BaseModel):
-    path: str
-    offset: Optional[int] = None
-    limit: Optional[int] = 2000
-class WriteReq(BaseModel):
-    path: str
-    content: str
-class EditReq(BaseModel):
-    path: str
-    old_str: str
-    new_str: str
-    replace_all: bool = False
-class ExistsReq(BaseModel):
-    path: str
-@app.get("/api/health")
-def health():
-    return {"status": "ok"}
-@app.post("/api/bash")
-def bash(req: BashReq):
-    try:
-        r = subprocess.run(
-            req.command, shell=True, capture_output=True, text=True,
-            cwd=req.work_dir, timeout=req.timeout,
-        )
-        output = r.stdout + r.stderr
-        if len(output) > 30000:
-            output = output[:30000] + "\\n... (truncated)"
-        return {"success": r.returncode == 0, "output": output, "error": "" if r.returncode == 0 else f"Exit code {r.returncode}"}
-    except subprocess.TimeoutExpired:
-        return {"success": False, "output": "", "error": f"Timeout after {req.timeout}s"}
-    except Exception as e:
-        return {"success": False, "output": "", "error": str(e)}
-@app.post("/api/read")
-def read(req: ReadReq):
-    try:
-        p = pathlib.Path(req.path)
-        if not p.exists():
-            return {"success": False, "output": "", "error": f"File not found: {req.path}"}
-        if p.is_dir():
-            return {"success": False, "output": "", "error": f"Is a directory: {req.path}"}
-        lines = p.read_text().splitlines()
-        start = (req.offset or 1) - 1
-        end = start + (req.limit or len(lines))
-        selected = lines[start:end]
-        numbered = "\\n".join(f"{start + i + 1}\\t{line}" for i, line in enumerate(selected))
-        return {"success": True, "output": numbered, "error": ""}
-    except Exception as e:
-        return {"success": False, "output": "", "error": str(e)}
-@app.post("/api/write")
-def write(req: WriteReq):
-    try:
-        p = pathlib.Path(req.path)
-        p.parent.mkdir(parents=True, exist_ok=True)
-        p.write_text(req.content)
-        return {"success": True, "output": f"Wrote {len(req.content)} bytes to {req.path}", "error": ""}
-    except Exception as e:
-        return {"success": False, "output": "", "error": str(e)}
-@app.post("/api/edit")
-def edit(req: EditReq):
-    try:
-        p = pathlib.Path(req.path)
-        if not p.exists():
-            return {"success": False, "output": "", "error": f"File not found: {req.path}"}
-        content = p.read_text()
-        if req.old_str not in content:
-            return {"success": False, "output": "", "error": f"old_str not found in {req.path}"}
-        if not req.replace_all and content.count(req.old_str) > 1:
-            return {"success": False, "output": "", "error": f"old_str appears {content.count(req.old_str)} times. Use replace_all=true or provide more context."}
-        if req.replace_all:
-            new_content = content.replace(req.old_str, req.new_str)
-        else:
-            new_content = content.replace(req.old_str, req.new_str, 1)
-        p.write_text(new_content)
-        return {"success": True, "output": f"Edited {req.path}", "error": ""}
-    except Exception as e:
-        return {"success": False, "output": "", "error": str(e)}
-@app.post("/api/exists")
-def exists(req: ExistsReq):
-    return {"success": True, "output": str(pathlib.Path(req.path).exists()).lower(), "error": ""}
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)
-'''
-@dataclass
-class ToolResult:
-    success: bool
-    output: str = ""
-    error: str = ""
-    def __str__(self):
-        if self.success:
-            return self.output or "(no output)"
-        return f"ERROR: {self.error}"
-    def to_dict(self) -> dict:
-        return {"success": self.success, "output": self.output, "error": self.error}
-@dataclass
-class Sandbox:
-    """
-    A handle to an HF Space sandbox.
-    Use Sandbox.create() to spin up a new one, or Sandbox.connect() to
-    attach to an existing running Space.
-    """
-    space_id: str
-    token: str | None = None
-    work_dir: str = "/app"
-    timeout: int = DEFAULT_TIMEOUT
-    _owns_space: bool = field(default=False, repr=False)
-    _base_url: str = field(init=False, repr=False)
-    _client: httpx.Client = field(init=False, repr=False)
-    _hf_api: HfApi = field(init=False, repr=False)
-    _files_read: set = field(init=False, repr=False, default_factory=set)
-    def __post_init__(self):
-        self.token = self.token or os.environ.get("HF_TOKEN")
-        slug = self.space_id.replace("/", "-")
-        # Trailing slash is critical: httpx resolves relative paths against base_url.
-        # Without it, client.get("health") resolves to /health instead of /api/health.
-        self._base_url = f"https://{slug}.hf.space/api/"
-        self._client = httpx.Client(
-            base_url=self._base_url,
-            headers={"Authorization": f"Bearer {self.token}"} if self.token else {},
-            timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),
-            follow_redirects=True,
-        )
-        self._hf_api = HfApi(token=self.token)
-    # ── Lifecycle ─────────────────────────────────────────────────
-    @classmethod
-    def create(
-        cls,
-        owner: str,
-        *,
-        name: str | None = None,
-        template: str = TEMPLATE_SPACE,
-        hardware: str = "cpu-basic",
-        private: bool = False,
-        sleep_time: int | None = None,
-        token: str | None = None,
-        wait_timeout: int = WAIT_TIMEOUT,
-    ) -> Sandbox:
-        """
-        Create a new sandbox by duplicating the template Space.
-        Generates a unique space name, duplicates the template, waits for it
-        to come online, then returns a connected Sandbox.
-        Args:
-            owner: HF username or org (e.g. "burtenshaw").
-            name: Base name for the space. Defaults to "sandbox".
-                  A unique suffix is always appended.
-            template: Source Space to duplicate (default: burtenshaw/sandbox).
-            hardware: Hardware tier (cpu-basic, t4-small, etc.).
-            private: Whether the Space should be private.
-            sleep_time: Auto-sleep after N seconds of inactivity.
-            token: HF API token. Falls back to HF_TOKEN env var.
-            wait_timeout: Max seconds to wait for Space to start (default: 300).
-        Returns:
-            A Sandbox instance connected to the running Space.
-        """
-        token = token or os.environ.get("HF_TOKEN")
-        api = HfApi(token=token)
-        base = name or "sandbox"
-        suffix = uuid.uuid4().hex[:8]
-        space_id = f"{owner}/{base}-{suffix}"
-        print(f"Creating sandbox: {space_id} (from {template})...")
-        kwargs = {
-            "from_id": template,
-            "to_id": space_id,
-            "private": private,
-            "hardware": hardware,
-        }
-        if sleep_time is not None:
-            kwargs["sleep_time"] = sleep_time
-        api.duplicate_space(**kwargs)
-        print(f"Space created: https://huggingface.co/spaces/{space_id}")
-        # Upload sandbox server and Dockerfile (triggers rebuild)
-        cls._setup_server(space_id, api)
-        # Wait for it to come online (rebuild + start)
-        print(f"Waiting for Space to start (timeout: {wait_timeout}s)...")
-        deadline = time.time() + wait_timeout
-        while time.time() < deadline:
-            runtime = api.get_space_runtime(space_id)
-            if runtime.stage == "RUNNING":
-                print(f"Space is running (hardware: {runtime.hardware})")
-                break
-            if runtime.stage in ("RUNTIME_ERROR", "BUILD_ERROR"):
-                raise RuntimeError(
-                    f"Space failed to start: {runtime.stage}. "
-                    f"Check https://huggingface.co/spaces/{space_id}"
-                )
-            print(f"  {runtime.stage}...")
-            time.sleep(WAIT_INTERVAL)
-        else:
-            raise TimeoutError(
-                f"Space did not start within {wait_timeout}s. "
-                f"Check https://huggingface.co/spaces/{space_id}"
-            )
-        # Wait for the API server to be responsive (non-fatal)
-        sb = cls(space_id=space_id, token=token, _owns_space=True)
-        try:
-            sb._wait_for_api(timeout=API_WAIT_TIMEOUT)
-        except TimeoutError as e:
-            print(
-                f"Warning: API health check timed out ({e}), but Space is RUNNING. Continuing."
-            )
-        return sb
-    @staticmethod
-    def _setup_server(space_id: str, api: HfApi) -> None:
-        """Upload embedded sandbox server + Dockerfile to the Space (single commit)."""
-        print(f"Uploading sandbox server to {space_id}...")
-        api.create_commit(
-            repo_id=space_id,
-            repo_type="space",
-            operations=[
-                CommitOperationAdd(
-                    path_in_repo="sandbox_server.py",
-                    path_or_fileobj=io.BytesIO(_SANDBOX_SERVER.encode()),
-                ),
-                CommitOperationAdd(
-                    path_in_repo="Dockerfile",
-                    path_or_fileobj=io.BytesIO(_DOCKERFILE.encode()),
-                ),
-            ],
-            commit_message="Setup sandbox server",
-        )
-        print("Server files uploaded, rebuild triggered.")
-    @classmethod
-    def connect(cls, space_id: str, *, token: str | None = None) -> Sandbox:
-        """
-        Connect to an existing running Space.
-        Does a health check to verify the Space is reachable.
-        """
-        sb = cls(space_id=space_id, token=token, _owns_space=False)
-        sb._wait_for_api(timeout=60)
-        return sb
-    def _wait_for_api(self, timeout: int = API_WAIT_TIMEOUT):
-        """Poll the health endpoint until the server responds."""
-        deadline = time.time() + timeout
-        last_err = None
-        last_status = None
-        while time.time() < deadline:
-            try:
-                resp = self._client.get("health", timeout=10)
-                last_status = resp.status_code
-                if resp.status_code == 200:
-                    print(f"API is responsive at {self._base_url}")
-                    return
-            except Exception as e:
-                last_err = e
-            time.sleep(3)
-        raise TimeoutError(
-            f"Sandbox API at {self._base_url} not responding after {timeout}s. "
-            f"Last status: {last_status}, last error: {last_err}"
-        )
-    def delete(self):
-        """Delete the Space. Only works if this Sandbox created it."""
-        if not self._owns_space:
-            raise RuntimeError(
-                f"This Sandbox did not create {self.space_id}. "
-                f"Use self._hf_api.delete_repo() directly if you're sure."
-            )
-        print(f"Deleting sandbox: {self.space_id}...")
-        self._hf_api.delete_repo(self.space_id, repo_type="space")
-        self._client.close()
-        print("Deleted.")
-    def pause(self):
-        """Pause the Space (stops billing, preserves state)."""
-        self._hf_api.pause_space(self.space_id)
-    def restart(self):
-        """Restart the Space."""
-        self._hf_api.restart_space(self.space_id)
-        self._wait_for_api()
-    @property
-    def url(self) -> str:
-        """Public URL of the Space."""
-        return f"https://huggingface.co/spaces/{self.space_id}"
-    @property
-    def status(self) -> str:
-        """Current Space stage (RUNNING, BUILDING, PAUSED, etc.)."""
-        return self._hf_api.get_space_runtime(self.space_id).stage
-    def __enter__(self) -> Sandbox:
-        return self
-    def __exit__(self, *exc):
-        if self._owns_space:
-            try:
-                self.delete()
-            except Exception as e:
-                print(f"Warning: failed to delete sandbox: {e}", file=sys.stderr)
-        self._client.close()
-    # ── HTTP plumbing ─────────────────────────────────────────────
-    def _call(
-        self, endpoint: str, payload: dict, timeout: float | None = None
-    ) -> ToolResult:
-        # Strip leading slash for correct httpx base_url resolution
-        endpoint = endpoint.lstrip("/")
-        try:
-            resp = self._client.post(
-                endpoint,
-                json=payload,
-                timeout=timeout or self.timeout,
-            )
-            data = resp.json()
-            if resp.status_code == 200:
-                return ToolResult(
-                    success=data.get("success", True),
-                    output=data.get("output", ""),
-                    error=data.get("error", ""),
-                )
-            return ToolResult(
-                success=False,
-                error=data.get("error", f"HTTP {resp.status_code}"),
-            )
-        except httpx.TimeoutException:
-            return ToolResult(
-                success=False, error=f"Timeout after {timeout or self.timeout}s"
-            )
-        except httpx.ConnectError:
-            return ToolResult(
-                success=False,
-                error=f"Cannot connect to sandbox. Is {self.space_id} running? Status: {self.status}",
-            )
-        except Exception as e:
-            return ToolResult(success=False, error=str(e))
-    # ── Tools ─────────────────────────────────────────────────────
-    def bash(
-        self,
-        command: str,
-        *,
-        work_dir: str | None = None,
-        timeout: int | None = None,
-        description: str | None = None,
-    ) -> ToolResult:
-        return self._call(
-            "bash",
-            {
-                "command": command,
-                "work_dir": work_dir or self.work_dir,
-                "timeout": min(timeout or self.timeout, MAX_TIMEOUT),
-            },
-            timeout=timeout,
-        )
-    def read(
-        self, path: str, *, offset: int | None = None, limit: int | None = None
-    ) -> ToolResult:
-        self._files_read.add(path)
-        return self._call(
-            "read",
-            {
-                "path": path,
-                "offset": offset,
-                "limit": limit or (DEFAULT_READ_LIMIT if offset is None else None),
-            },
-        )
-    def write(self, path: str, content: str) -> ToolResult:
-        if path not in self._files_read:
-            check = self._call("exists", {"path": path})
-            if check.success and check.output == "true":
-                return ToolResult(
-                    success=False,
-                    error=(
-                        f"File {path} exists but has not been read this session. "
-                        f"Read it first, or use sandbox_edit for targeted changes."
-                    ),
-                )
-        result = self._call("write", {"path": path, "content": content})
-        if result.success:
-            self._files_read.add(path)
-        return result
-    def edit(
-        self, path: str, old_str: str, new_str: str, *, replace_all: bool = False
-    ) -> ToolResult:
-        if old_str == new_str:
-            return ToolResult(success=False, error="old_str and new_str are identical.")
-        if path not in self._files_read:
-            return ToolResult(
-                success=False,
-                error=f"File {path} has not been read this session. Read it first.",
-            )
-        return self._call(
-            "edit",
-            {
-                "path": path,
-                "old_str": old_str,
-                "new_str": new_str,
-                "replace_all": replace_all,
-            },
-        )
-    # ── Tool schemas & dispatch ───────────────────────────────────
-    TOOLS = {
-        "bash": {
-            "description": (
-                "Run a shell command in the remote sandbox and return stdout/stderr.\n"
-                "\n"
-                "Commands run in a shell at the working directory (default /app). "
-                "Each invocation is independent — use files in /app to persist state.\n"
-                "\n"
-                "AVOID using bash for operations covered by specialized tools:\n"
-                "- File reading: use read (not cat/head/tail)\n"
-                "- File editing: use edit (not sed/awk)\n"
-                "- File writing: use write (not echo/cat <<EOF)\n"
-                "\n"
-                "For long-running tasks, background them:\n"
-                "  nohup uv run train.py > /app/train.log 2>&1 &\n"
-                "Then check with read on the log file.\n"
-                "\n"
-                "Chain dependent commands with &&. Independent commands should be "
-                "separate bash calls (they can run in parallel).\n"
-                "\n"
-                "Timeout default 120s, max 600s."
-            ),
-            "parameters": {
-                "type": "object",
-                "required": ["command"],
-                "additionalProperties": False,
-                "properties": {
-                    "command": {
-                        "type": "string",
-                        "description": "The shell command to execute.",
-                    },
-                    "description": {
-                        "type": "string",
-                        "description": "Short description (5-10 words, active voice). E.g. 'Install dependencies', 'Run training script'.",
-                    },
-                    "work_dir": {
-                        "type": "string",
-                        "description": "Working directory (default: /app).",
-                    },
-                    "timeout": {
-                        "type": "integer",
-                        "description": "Timeout in seconds (default: 120, max: 600).",
-                    },
-                },
-            },
-        },
-        "read": {
-            "description": (
-                "Read file contents with line numbers (cat -n format).\n"
-                "\n"
-                "Returns the first 2000 lines by default. For large files, use offset/limit "
-                "to read a specific range. Line numbers always match the original file.\n"
-                "\n"
-                "Lines longer than 2000 chars are truncated.\n"
-                "Cannot read directories — use bash with 'ls' instead."
-            ),
-            "parameters": {
-                "type": "object",
-                "required": ["path"],
-                "additionalProperties": False,
-                "properties": {
-                    "path": {
-                        "type": "string",
-                        "description": "Absolute path to the file to read.",
-                    },
-                    "offset": {
-                        "type": "integer",
-                        "description": "Start from this line (1-based). Only if file is too large.",
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "Number of lines to read. Only if file is too large.",
-                    },
-                },
-            },
-        },
-        "write": {
-            "description": (
-                "Create or overwrite a file. Creates parent directories as needed.\n"
-                "\n"
-                "For existing files, you MUST read the file first (system enforced). "
-                "Prefer edit for modifications."
-            ),
-            "parameters": {
-                "type": "object",
-                "required": ["path", "content"],
-                "additionalProperties": False,
-                "properties": {
-                    "path": {
-                        "type": "string",
-                        "description": "Absolute path to the file to write.",
-                    },
-                    "content": {
-                        "type": "string",
-                        "description": "Complete file content.",
-                    },
-                },
-            },
-        },
-        "edit": {
-            "description": (
-                "Targeted edit via exact string replacement.\n"
-                "\n"
-                "Rules:\n"
-                "- old_str must appear EXACTLY once (unless replace_all is true).\n"
-                "- Include enough context in old_str for uniqueness.\n"
-                "- old_str and new_str must differ.\n"
-                "- Preserve indentation exactly.\n"
-                "- To delete code, set new_str to empty string.\n"
-                "- File MUST have been read this session (system enforced).\n"
-                "- Do NOT include line number prefixes in old_str/new_str.\n"
-                "\n"
-                "Use replace_all=true for batch operations like variable renaming."
-            ),
-            "parameters": {
-                "type": "object",
-                "required": ["path", "old_str", "new_str"],
-                "additionalProperties": False,
-                "properties": {
-                    "path": {
-                        "type": "string",
-                        "description": "Absolute path to the file.",
-                    },
-                    "old_str": {
-                        "type": "string",
-                        "description": "Exact text to find (must differ from new_str).",
-                    },
-                    "new_str": {"type": "string", "description": "Replacement text."},
-                    "replace_all": {
-                        "type": "boolean",
-                        "description": "Replace all occurrences (default: false).",
-                        "default": False,
-                    },
-                },
-            },
-        },
-    }
-    @classmethod
-    def tool_definitions(cls) -> list[dict]:
-        return [{"name": name, **spec} for name, spec in cls.TOOLS.items()]
-    def call_tool(self, name: str, arguments: dict[str, Any]) -> ToolResult:
-        dispatch = {
-            "bash": lambda a: self.bash(
-                a["command"],
-                work_dir=a.get("work_dir"),
-                timeout=a.get("timeout"),
-                description=a.get("description"),
-            ),
-            "read": lambda a: self.read(
-                a["path"],
-                offset=a.get("offset"),
-                limit=a.get("limit"),
-            ),
-            "write": lambda a: self.write(a["path"], a["content"]),
-            "edit": lambda a: self.edit(
-                a["path"],
-                a["old_str"],
-                a["new_str"],
-                replace_all=a.get("replace_all", False),
-            ),
-        }
-        fn = dispatch.get(name)
-        if not fn:
-            return ToolResult(success=False, error=f"Unknown tool: {name}")
-        return fn(arguments)

agent/tools/sandbox_tool.py DELETED Viewed

@@ -1,201 +0,0 @@
-"""
-Sandbox tools — expose the Sandbox client as agent tools.
-5 tools total:
-  sandbox_create — explicit sandbox creation (requires approval)
-  bash, read, write, edit — operations on the sandbox
-If any operation tool is called without an active sandbox,
-a cpu-basic sandbox is auto-created (no approval needed).
-"""
-from __future__ import annotations
-import asyncio
-import os
-from typing import Any
-from huggingface_hub import HfApi, SpaceHardware
-from agent.core.session import Event
-from agent.tools.sandbox_client import Sandbox
-# ── Tool name mapping (short agent names → Sandbox client names) ──────
-async def _ensure_sandbox(
-    session: Any, hardware: str = "cpu-basic", **create_kwargs
-) -> tuple[Sandbox | None, str | None]:
-    """
-    Ensure a sandbox exists on the session. Auto-creates with given hardware if needed.
-    Returns:
-        (sandbox, error_message) — one will be None.
-    """
-    if session and getattr(session, "sandbox", None):
-        return session.sandbox, None
-    if not session:
-        return None, "No session available."
-    token = os.environ.get("HF_TOKEN")
-    if not token:
-        return None, "HF_TOKEN environment variable not set. Cannot create sandbox."
-    api = HfApi(token=token)
-    user_info = api.whoami()
-    owner = user_info.get("name", user_info.get("user", ""))
-    if not owner:
-        return None, "Could not determine HF username from token."
-    await session.send_event(
-        Event(
-            event_type="tool_log",
-            data={
-                "tool": "sandbox",
-                "log": f"Auto-creating sandbox for {owner} ({hardware})...",
-            },
-        )
-    )
-    kwargs = {"owner": owner, "hardware": hardware, "token": token, **create_kwargs}
-    sb = await asyncio.to_thread(Sandbox.create, **kwargs)
-    session.sandbox = sb
-    await session.send_event(
-        Event(
-            event_type="tool_log",
-            data={"tool": "sandbox", "log": f"Sandbox ready: {sb.space_id} ({sb.url})"},
-        )
-    )
-    return sb, None
-# ── sandbox_create tool ──────────────────────────────────────────────
-SANDBOX_CREATE_TOOL_SPEC = {
-    "name": "sandbox_create",
-    "description": (
-        "Create a persistent remote Linux environment for developing and testing scripts.\n\n"
-        "Workflow: sandbox_create → write script → pip install → test with small run → fix errors → hf_jobs at scale.\n"
-        "The sandbox persists across tool calls within the session. pip install works out of the box.\n\n"
-        "Use this when: you need to develop, test, and iterate on scripts before launching via hf_jobs. "
-        "Especially for training scripts where you need to verify imports, test on a small subset, and fix errors interactively.\n\n"
-        "Skip this when: the task is a simple one-shot operation (status check, resource search, quick data query), "
-        "or the script is copied from a verified working example with minimal changes.\n\n"
-        "For ML code that uses CUDA, bf16, or model loading: use GPU hardware (t4-small minimum). "
-        "CPU sandboxes cannot run GPU code paths — your test will not catch GPU-related errors.\n\n"
-        "Hardware: " + ", ".join([e.value for e in SpaceHardware]) + ".\n"
-    ),
-    "parameters": {
-        "type": "object",
-        "required": [],
-        "additionalProperties": False,
-        "properties": {
-            "hardware": {
-                "type": "string",
-                "enum": [e.value for e in SpaceHardware],
-                "description": "Hardware tier for the sandbox (default: cpu-basic)",
-            },
-            "private": {
-                "type": "boolean",
-                "description": "If true, create a private Space",
-            },
-        },
-    },
-}
-async def sandbox_create_handler(
-    args: dict[str, Any], session: Any = None
-) -> tuple[str, bool]:
-    """Handle sandbox_create tool calls."""
-    # If sandbox already exists, return its info
-    if session and getattr(session, "sandbox", None):
-        sb = session.sandbox
-        return (
-            f"Sandbox already active: {sb.space_id}\n"
-            f"URL: {sb.url}\n"
-            f"Use bash/read/write/edit to interact with it."
-        ), True
-    hardware = args.get("hardware", "cpu-basic")
-    create_kwargs = {}
-    if "private" in args:
-        create_kwargs["private"] = args["private"]
-    try:
-        sb, error = await _ensure_sandbox(session, hardware=hardware, **create_kwargs)
-    except Exception as e:
-        return f"Failed to create sandbox: {e}", False
-    if error:
-        return error, False
-    return (
-        f"Sandbox created: {sb.space_id}\n"
-        f"URL: {sb.url}\n"
-        f"Hardware: {hardware}\n"
-        f"Use bash/read/write/edit to interact with it."
-    ), True
-def _make_tool_handler(sandbox_tool_name: str):
-    """Factory: create a handler for a sandbox operation tool."""
-    async def handler(args: dict[str, Any], session: Any = None) -> tuple[str, bool]:
-        # Auto-create sandbox if not present
-        try:
-            sb, error = await _ensure_sandbox(session)
-        except Exception as e:
-            return f"Failed to auto-create sandbox: {e}", False
-        if error:
-            return error, False
-        try:
-            result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
-            if result.success:
-                return result.output or "(no output)", True
-            else:
-                error_msg = result.error or "Unknown error"
-                output = result.output
-                if output:
-                    return f"{output}\n\nERROR: {error_msg}", False
-                return f"ERROR: {error_msg}", False
-        except Exception as e:
-            return f"Sandbox operation failed: {e}", False
-    return handler
-def get_sandbox_tools():
-    """Return all 5 sandbox ToolSpecs (sandbox_create + 4 operation tools)."""
-    from agent.core.tools import ToolSpec
-    tools = []
-    # sandbox_create (explicit creation, requires approval)
-    tools.append(
-        ToolSpec(
-            name=SANDBOX_CREATE_TOOL_SPEC["name"],
-            description=SANDBOX_CREATE_TOOL_SPEC["description"],
-            parameters=SANDBOX_CREATE_TOOL_SPEC["parameters"],
-            handler=sandbox_create_handler,
-        )
-    )
-    # Operation tools (auto-execute, no approval needed)
-    for name in Sandbox.TOOLS.keys():
-        spec = Sandbox.TOOLS[name]
-        tools.append(
-            ToolSpec(
-                name=name,
-                description=spec["description"],
-                parameters=spec["parameters"],
-                handler=_make_tool_handler(name),
-            )
-        )
-    return tools

backend/dependencies.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""Authentication dependencies for FastAPI routes.
+Provides auth validation for both REST and WebSocket endpoints.
+- In dev mode (OAUTH_CLIENT_ID not set): auth is bypassed, returns a default "dev" user.
+- In production: validates Bearer tokens or cookies against HF OAuth.
+"""
+import logging
+import os
+import time
+from typing import Any
+import httpx
+from fastapi import HTTPException, Request, WebSocket, status
+logger = logging.getLogger(__name__)
+OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
+AUTH_ENABLED = bool(os.environ.get("OAUTH_CLIENT_ID", ""))
+# Simple in-memory token cache: token -> (user_info, expiry_time)
+_token_cache: dict[str, tuple[dict[str, Any], float]] = {}
+TOKEN_CACHE_TTL = 300  # 5 minutes
+DEV_USER: dict[str, Any] = {
+    "user_id": "dev",
+    "username": "dev",
+    "authenticated": True,
+}
+async def _validate_token(token: str) -> dict[str, Any] | None:
+    """Validate a token against HF OAuth userinfo endpoint.
+    Results are cached for TOKEN_CACHE_TTL seconds to avoid excessive API calls.
+    """
+    now = time.time()
+    # Check cache
+    if token in _token_cache:
+        user_info, expiry = _token_cache[token]
+        if now < expiry:
+            return user_info
+        del _token_cache[token]
+    # Validate against HF
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        try:
+            response = await client.get(
+                f"{OPENID_PROVIDER_URL}/oauth/userinfo",
+                headers={"Authorization": f"Bearer {token}"},
+            )
+            if response.status_code != 200:
+                logger.debug("Token validation failed: status %d", response.status_code)
+                return None
+            user_info = response.json()
+            _token_cache[token] = (user_info, now + TOKEN_CACHE_TTL)
+            return user_info
+        except httpx.HTTPError as e:
+            logger.warning("Token validation error: %s", e)
+            return None
+def _user_from_info(user_info: dict[str, Any]) -> dict[str, Any]:
+    """Build a normalized user dict from HF userinfo response."""
+    return {
+        "user_id": user_info.get("sub", user_info.get("preferred_username", "unknown")),
+        "username": user_info.get("preferred_username", "unknown"),
+        "name": user_info.get("name"),
+        "picture": user_info.get("picture"),
+        "authenticated": True,
+    }
+async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
+    """Validate a token and return a user dict, or None."""
+    user_info = await _validate_token(token)
+    if user_info:
+        return _user_from_info(user_info)
+    return None
+async def get_current_user(request: Request) -> dict[str, Any]:
+    """FastAPI dependency: extract and validate the current user.
+    Checks (in order):
+    1. Authorization: Bearer <token> header
+    2. hf_access_token cookie
+    In dev mode (AUTH_ENABLED=False), returns a default dev user.
+    """
+    if not AUTH_ENABLED:
+        return DEV_USER
+    # Try Authorization header
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        token = auth_header[7:]
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    # Try cookie
+    token = request.cookies.get("hf_access_token")
+    if token:
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Not authenticated. Please log in via /auth/login.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+async def get_ws_user(websocket: WebSocket) -> dict[str, Any] | None:
+    """Extract and validate user from WebSocket connection.
+    WebSocket doesn't support custom headers from browser, so we check:
+    1. ?token= query parameter
+    2. hf_access_token cookie (sent automatically for same-origin)
+    Returns user dict or None if not authenticated.
+    In dev mode, returns the default dev user.
+    """
+    if not AUTH_ENABLED:
+        return DEV_USER
+    # Try query param
+    token = websocket.query_params.get("token")
+    if token:
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    # Try cookie (works for same-origin WebSocket)
+    token = websocket.cookies.get("hf_access_token")
+    if token:
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    return None

backend/main.py CHANGED Viewed

@@ -5,6 +5,14 @@ import os
 from contextlib import asynccontextmanager
 from pathlib import Path
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles

 from contextlib import asynccontextmanager
 from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+# Ensure HF_TOKEN is set — fall back to HF_ADMIN_TOKEN if available (HF Spaces)
+if not os.environ.get("HF_TOKEN") and os.environ.get("HF_ADMIN_TOKEN"):
+    os.environ["HF_TOKEN"] = os.environ["HF_ADMIN_TOKEN"]
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles

backend/models.py CHANGED Viewed

@@ -37,6 +37,7 @@ class ToolApproval(BaseModel):
     tool_call_id: str
     approved: bool
     feedback: str | None = None
 class ApprovalRequest(BaseModel):
@@ -67,6 +68,7 @@ class SessionInfo(BaseModel):
     created_at: str
     is_active: bool
     message_count: int
 class HealthResponse(BaseModel):
@@ -74,3 +76,13 @@ class HealthResponse(BaseModel):
     status: str = "ok"
     active_sessions: int = 0

     tool_call_id: str
     approved: bool
     feedback: str | None = None
+    edited_script: str | None = None
 class ApprovalRequest(BaseModel):
     created_at: str
     is_active: bool
     message_count: int
+    user_id: str = "dev"
 class HealthResponse(BaseModel):
     status: str = "ok"
     active_sessions: int = 0
+    max_sessions: int = 0
+class LLMHealthResponse(BaseModel):
+    """LLM provider health check response."""
+    status: str  # "ok" | "error"
+    model: str
+    error: str | None = None
+    error_type: str | None = None  # "auth" | "credits" | "rate_limit" | "network" | "unknown"

backend/routes/agent.py CHANGED Viewed

@@ -1,58 +1,252 @@
-"""Agent API routes - WebSocket and REST endpoints."""
-import logging
-from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from models import (
     ApprovalRequest,
     HealthResponse,
     SessionInfo,
     SessionResponse,
     SubmitRequest,
 )
-from session_manager import session_manager
 from websocket import manager as ws_manager
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["agent"])
 @router.get("/health", response_model=HealthResponse)
 async def health_check() -> HealthResponse:
     """Health check endpoint."""
     return HealthResponse(
-        status="ok", active_sessions=session_manager.active_session_count
     )
 @router.post("/session", response_model=SessionResponse)
-async def create_session() -> SessionResponse:
-    """Create a new agent session."""
-    session_id = await session_manager.create_session()
     return SessionResponse(session_id=session_id, ready=True)
 @router.get("/session/{session_id}", response_model=SessionInfo)
-async def get_session(session_id: str) -> SessionInfo:
-    """Get session information."""
     info = session_manager.get_session_info(session_id)
-    if not info:
-        raise HTTPException(status_code=404, detail="Session not found")
     return SessionInfo(**info)
 @router.get("/sessions", response_model=list[SessionInfo])
-async def list_sessions() -> list[SessionInfo]:
-    """List all sessions."""
-    sessions = session_manager.list_sessions()
     return [SessionInfo(**s) for s in sessions]
 @router.delete("/session/{session_id}")
-async def delete_session(session_id: str) -> dict:
-    """Delete a session."""
     success = await session_manager.delete_session(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found")
@@ -60,8 +254,11 @@ async def delete_session(session_id: str) -> dict:
 @router.post("/submit")
-async def submit_input(request: SubmitRequest) -> dict:
-    """Submit user input to a session."""
     success = await session_manager.submit_user_input(request.session_id, request.text)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -69,13 +266,17 @@ async def submit_input(request: SubmitRequest) -> dict:
 @router.post("/approve")
-async def submit_approval(request: ApprovalRequest) -> dict:
-    """Submit tool approvals to a session."""
     approvals = [
         {
             "tool_call_id": a.tool_call_id,
             "approved": a.approved,
             "feedback": a.feedback,
         }
         for a in request.approvals
     ]
@@ -86,8 +287,11 @@ async def submit_approval(request: ApprovalRequest) -> dict:
 @router.post("/interrupt/{session_id}")
-async def interrupt_session(session_id: str) -> dict:
     """Interrupt the current operation in a session."""
     success = await session_manager.interrupt(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -95,8 +299,9 @@ async def interrupt_session(session_id: str) -> dict:
 @router.post("/undo/{session_id}")
-async def undo_session(session_id: str) -> dict:
     """Undo the last turn in a session."""
     success = await session_manager.undo(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -104,8 +309,11 @@ async def undo_session(session_id: str) -> dict:
 @router.post("/compact/{session_id}")
-async def compact_session(session_id: str) -> dict:
     """Compact the context in a session."""
     success = await session_manager.compact(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -113,8 +321,11 @@ async def compact_session(session_id: str) -> dict:
 @router.post("/shutdown/{session_id}")
-async def shutdown_session(session_id: str) -> dict:
     """Shutdown a session."""
     success = await session_manager.shutdown_session(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -123,17 +334,61 @@ async def shutdown_session(session_id: str) -> dict:
 @router.websocket("/ws/{session_id}")
 async def websocket_endpoint(websocket: WebSocket, session_id: str) -> None:
-    """WebSocket endpoint for real-time events."""
     logger.info(f"WebSocket connection request for session {session_id}")
     # Verify session exists
     info = session_manager.get_session_info(session_id)
     if not info:
-        logger.warning(f"WebSocket connection rejected: Session {session_id} not found")
         await websocket.close(code=4004, reason="Session not found")
         return
     await ws_manager.connect(websocket, session_id)
     try:
         while True:
             # Keep connection alive, handle ping/pong

+"""Agent API routes - WebSocket and REST endpoints.
+All routes (except /health) require authentication via the get_current_user
+dependency. In dev mode (no OAUTH_CLIENT_ID), auth is bypassed automatically.
+"""
+import logging
+import os
+from typing import Any
+from dependencies import get_current_user, get_ws_user
+from fastapi import (
+    APIRouter,
+    Depends,
+    HTTPException,
+    Request,
+    WebSocket,
+    WebSocketDisconnect,
+)
+from litellm import acompletion
+from agent.core.agent_loop import _resolve_hf_router_params
 from models import (
     ApprovalRequest,
     HealthResponse,
+    LLMHealthResponse,
     SessionInfo,
     SessionResponse,
     SubmitRequest,
 )
+from session_manager import MAX_SESSIONS, SessionCapacityError, session_manager
 from websocket import manager as ws_manager
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["agent"])
+AVAILABLE_MODELS = [
+    {
+        "id": "huggingface/novita/minimax/minimax-m2.1",
+        "label": "MiniMax M2.1",
+        "provider": "huggingface",
+        "recommended": True,
+    },
+    {
+        "id": "anthropic/claude-opus-4-5-20251101",
+        "label": "Claude Opus 4.5",
+        "provider": "anthropic",
+        "recommended": True,
+    },
+    {
+        "id": "huggingface/novita/moonshotai/kimi-k2.5",
+        "label": "Kimi K2.5",
+        "provider": "huggingface",
+    },
+    {
+        "id": "huggingface/novita/zai-org/glm-5",
+        "label": "GLM 5",
+        "provider": "huggingface",
+    },
+]
+def _check_session_access(session_id: str, user: dict[str, Any]) -> None:
+    """Verify the user has access to the given session. Raises 403 or 404."""
+    info = session_manager.get_session_info(session_id)
+    if not info:
+        raise HTTPException(status_code=404, detail="Session not found")
+    if not session_manager.verify_session_access(session_id, user["user_id"]):
+        raise HTTPException(status_code=403, detail="Access denied to this session")
 @router.get("/health", response_model=HealthResponse)
 async def health_check() -> HealthResponse:
     """Health check endpoint."""
     return HealthResponse(
+        status="ok",
+        active_sessions=session_manager.active_session_count,
+        max_sessions=MAX_SESSIONS,
     )
+@router.get("/health/llm", response_model=LLMHealthResponse)
+async def llm_health_check() -> LLMHealthResponse:
+    """Check if the LLM provider is reachable and the API key is valid.
+    Makes a minimal 1-token completion call.  Catches common errors:
+    - 401 → invalid API key
+    - 402/insufficient_quota → out of credits
+    - 429 → rate limited
+    - timeout / network → provider unreachable
+    """
+    model = session_manager.config.model_name
+    try:
+        llm_params = _resolve_hf_router_params(model)
+        await acompletion(
+            messages=[{"role": "user", "content": "hi"}],
+            max_tokens=1,
+            timeout=10,
+            **llm_params,
+        )
+        return LLMHealthResponse(status="ok", model=model)
+    except Exception as e:
+        err_str = str(e).lower()
+        error_type = "unknown"
+        if (
+            "401" in err_str
+            or "auth" in err_str
+            or "invalid" in err_str
+            or "api key" in err_str
+        ):
+            error_type = "auth"
+        elif (
+            "402" in err_str
+            or "credit" in err_str
+            or "quota" in err_str
+            or "insufficient" in err_str
+            or "billing" in err_str
+        ):
+            error_type = "credits"
+        elif "429" in err_str or "rate" in err_str:
+            error_type = "rate_limit"
+        elif "timeout" in err_str or "connect" in err_str or "network" in err_str:
+            error_type = "network"
+        logger.warning(f"LLM health check failed ({error_type}): {e}")
+        return LLMHealthResponse(
+            status="error",
+            model=model,
+            error=str(e)[:500],
+            error_type=error_type,
+        )
+@router.get("/config/model")
+async def get_model() -> dict:
+    """Get current model and available models. No auth required."""
+    return {
+        "current": session_manager.config.model_name,
+        "available": AVAILABLE_MODELS,
+    }
+@router.post("/config/model")
+async def set_model(body: dict, user: dict = Depends(get_current_user)) -> dict:
+    """Set the LLM model. Applies to new conversations."""
+    model_id = body.get("model")
+    if not model_id:
+        raise HTTPException(status_code=400, detail="Missing 'model' field")
+    valid_ids = {m["id"] for m in AVAILABLE_MODELS}
+    if model_id not in valid_ids:
+        raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
+    session_manager.config.model_name = model_id
+    logger.info(f"Model changed to {model_id} by {user.get('username', 'unknown')}")
+    return {"model": model_id}
+@router.post("/title")
+async def generate_title(
+    request: SubmitRequest, user: dict = Depends(get_current_user)
+) -> dict:
+    """Generate a short title for a chat session based on the first user message."""
+    model = session_manager.config.model_name
+    llm_params = _resolve_hf_router_params(model)
+    try:
+        response = await acompletion(
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "Generate a very short title (max 6 words) for a chat conversation "
+                        "that starts with the following user message. "
+                        "Reply with ONLY the title, no quotes, no punctuation at the end."
+                    ),
+                },
+                {"role": "user", "content": request.text[:500]},
+            ],
+            max_tokens=20,
+            temperature=0.3,
+            timeout=8,
+            **llm_params,
+        )
+        title = response.choices[0].message.content.strip().strip('"').strip("'")
+        # Safety: cap at 50 chars
+        if len(title) > 50:
+            title = title[:50].rstrip() + "…"
+        return {"title": title}
+    except Exception as e:
+        logger.warning(f"Title generation failed: {e}")
+        # Fallback: truncate the message
+        fallback = request.text.strip()
+        title = fallback[:40].rstrip() + "…" if len(fallback) > 40 else fallback
+        return {"title": title}
 @router.post("/session", response_model=SessionResponse)
+async def create_session(
+    request: Request, user: dict = Depends(get_current_user)
+) -> SessionResponse:
+    """Create a new agent session bound to the authenticated user.
+    The user's HF access token is extracted from the Authorization header
+    and stored in the session so that tools (e.g. hf_jobs) can act on
+    behalf of the user.
+    Returns 503 if the server or user has reached the session limit.
+    """
+    # Extract the user's HF token (Bearer header or HttpOnly cookie)
+    hf_token = None
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        hf_token = auth_header[7:]
+    if not hf_token:
+        hf_token = request.cookies.get("hf_access_token")
+    try:
+        session_id = await session_manager.create_session(
+            user_id=user["user_id"], hf_token=hf_token
+        )
+    except SessionCapacityError as e:
+        raise HTTPException(status_code=503, detail=str(e))
     return SessionResponse(session_id=session_id, ready=True)
 @router.get("/session/{session_id}", response_model=SessionInfo)
+async def get_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> SessionInfo:
+    """Get session information. Only accessible by the session owner."""
+    _check_session_access(session_id, user)
     info = session_manager.get_session_info(session_id)
     return SessionInfo(**info)
 @router.get("/sessions", response_model=list[SessionInfo])
+async def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:
+    """List sessions belonging to the authenticated user."""
+    sessions = session_manager.list_sessions(user_id=user["user_id"])
     return [SessionInfo(**s) for s in sessions]
 @router.delete("/session/{session_id}")
+async def delete_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
+    """Delete a session. Only accessible by the session owner."""
+    _check_session_access(session_id, user)
     success = await session_manager.delete_session(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found")
 @router.post("/submit")
+async def submit_input(
+    request: SubmitRequest, user: dict = Depends(get_current_user)
+) -> dict:
+    """Submit user input to a session. Only accessible by the session owner."""
+    _check_session_access(request.session_id, user)
     success = await session_manager.submit_user_input(request.session_id, request.text)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
 @router.post("/approve")
+async def submit_approval(
+    request: ApprovalRequest, user: dict = Depends(get_current_user)
+) -> dict:
+    """Submit tool approvals to a session. Only accessible by the session owner."""
+    _check_session_access(request.session_id, user)
     approvals = [
         {
             "tool_call_id": a.tool_call_id,
             "approved": a.approved,
             "feedback": a.feedback,
+            "edited_script": a.edited_script,
         }
         for a in request.approvals
     ]
 @router.post("/interrupt/{session_id}")
+async def interrupt_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
     """Interrupt the current operation in a session."""
+    _check_session_access(session_id, user)
     success = await session_manager.interrupt(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
 @router.post("/undo/{session_id}")
+async def undo_session(session_id: str, user: dict = Depends(get_current_user)) -> dict:
     """Undo the last turn in a session."""
+    _check_session_access(session_id, user)
     success = await session_manager.undo(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
 @router.post("/compact/{session_id}")
+async def compact_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
     """Compact the context in a session."""
+    _check_session_access(session_id, user)
     success = await session_manager.compact(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
 @router.post("/shutdown/{session_id}")
+async def shutdown_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
     """Shutdown a session."""
+    _check_session_access(session_id, user)
     success = await session_manager.shutdown_session(session_id)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
 @router.websocket("/ws/{session_id}")
 async def websocket_endpoint(websocket: WebSocket, session_id: str) -> None:
+    """WebSocket endpoint for real-time events.
+    Authentication is done via:
+    - ?token= query parameter (for browsers that can't send WS headers)
+    - Cookie (automatic for same-origin connections)
+    - Dev mode bypass (when OAUTH_CLIENT_ID is not set)
+    NOTE: We must accept() before close() so the browser receives our custom
+    close codes (4001, 4003, 4004).  If we close() before accept(), Starlette
+    sends HTTP 403 and the browser only sees code 1006 (abnormal closure).
+    """
     logger.info(f"WebSocket connection request for session {session_id}")
+    # Authenticate the WebSocket connection
+    user = await get_ws_user(websocket)
+    if not user:
+        logger.warning(
+            f"WebSocket rejected: authentication failed for session {session_id}"
+        )
+        await websocket.accept()
+        await websocket.close(code=4001, reason="Authentication required")
+        return
     # Verify session exists
     info = session_manager.get_session_info(session_id)
     if not info:
+        logger.warning(f"WebSocket rejected: session {session_id} not found")
+        await websocket.accept()
         await websocket.close(code=4004, reason="Session not found")
         return
+    # Verify user owns the session
+    if not session_manager.verify_session_access(session_id, user["user_id"]):
+        logger.warning(
+            f"WebSocket rejected: user {user['user_id']} denied access to session {session_id}"
+        )
+        await websocket.accept()
+        await websocket.close(code=4003, reason="Access denied")
+        return
     await ws_manager.connect(websocket, session_id)
+    # Send "ready" immediately on WebSocket connection so the frontend
+    # knows the session is alive.  The original ready event from _run_session
+    # fires before the WS is connected and is always lost.
+    try:
+        await websocket.send_json(
+            {
+                "event_type": "ready",
+                "data": {"message": "Agent initialized"},
+            }
+        )
+    except Exception as e:
+        logger.error(f"Failed to send ready event for session {session_id}: {e}")
     try:
         while True:
             # Keep connection alive, handle ping/pong

backend/routes/auth.py CHANGED Viewed

@@ -1,11 +1,17 @@
-"""Authentication routes for HF OAuth."""
 import os
 import secrets
 from urllib.parse import urlencode
 import httpx
-from fastapi import APIRouter, HTTPException, Request
 from fastapi.responses import RedirectResponse
 router = APIRouter(prefix="/auth", tags=["auth"])
@@ -15,10 +21,19 @@ OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID", "")
 OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET", "")
 OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
-# In-memory session store (replace with proper session management in production)
 oauth_states: dict[str, dict] = {}
 def get_redirect_uri(request: Request) -> str:
     """Get the OAuth callback redirect URI."""
     # In HF Spaces, use the SPACE_HOST if available
@@ -38,17 +53,26 @@ async def oauth_login(request: Request) -> RedirectResponse:
             detail="OAuth not configured. Set OAUTH_CLIENT_ID environment variable.",
         )
     # Generate state for CSRF protection
     state = secrets.token_urlsafe(32)
-    oauth_states[state] = {"redirect_uri": get_redirect_uri(request)}
     # Build authorization URL
     params = {
         "client_id": OAUTH_CLIENT_ID,
         "redirect_uri": get_redirect_uri(request),
-        "scope": "openid profile",
         "response_type": "code",
         "state": state,
     }
     auth_url = f"{OPENID_PROVIDER_URL}/oauth/authorize?{urlencode(params)}"
@@ -91,58 +115,57 @@ async def oauth_callback(
     # Get user info
     access_token = token_data.get("access_token")
-    if access_token:
-        async with httpx.AsyncClient() as client:
-            try:
-                userinfo_response = await client.get(
-                    f"{OPENID_PROVIDER_URL}/oauth/userinfo",
-                    headers={"Authorization": f"Bearer {access_token}"},
-                )
-                userinfo_response.raise_for_status()
-                user_info = userinfo_response.json()
-            except httpx.HTTPError:
-                user_info = {}
-    else:
-        user_info = {}
-    # For now, redirect to home with token in query params
-    # In production, use secure cookies or session storage
-    redirect_params = {
-        "access_token": access_token,
-        "username": user_info.get("preferred_username", ""),
-    }
-    return RedirectResponse(url=f"/?{urlencode(redirect_params)}")
 @router.get("/logout")
 async def logout() -> RedirectResponse:
-    """Log out the user."""
-    return RedirectResponse(url="/")
-@router.get("/me")
-async def get_current_user(request: Request) -> dict:
-    """Get current user info from Authorization header."""
-    auth_header = request.headers.get("Authorization", "")
-    if not auth_header.startswith("Bearer "):
-        return {"authenticated": False}
-    token = auth_header.split(" ")[1]
-    async with httpx.AsyncClient() as client:
-        try:
-            response = await client.get(
-                f"{OPENID_PROVIDER_URL}/oauth/userinfo",
-                headers={"Authorization": f"Bearer {token}"},
-            )
-            response.raise_for_status()
-            user_info = response.json()
-            return {
-                "authenticated": True,
-                "username": user_info.get("preferred_username"),
-                "name": user_info.get("name"),
-                "picture": user_info.get("picture"),
-            }
-        except httpx.HTTPError:
-            return {"authenticated": False}

+"""Authentication routes for HF OAuth.
+Handles the OAuth 2.0 authorization code flow with HF as provider.
+After successful auth, sets an HttpOnly cookie with the access token.
+"""
 import os
 import secrets
+import time
 from urllib.parse import urlencode
 import httpx
+from dependencies import AUTH_ENABLED, get_current_user
+from fastapi import APIRouter, Depends, HTTPException, Request
 from fastapi.responses import RedirectResponse
 router = APIRouter(prefix="/auth", tags=["auth"])
 OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET", "")
 OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
+# In-memory OAuth state store with expiry (5 min TTL)
+_OAUTH_STATE_TTL = 300
 oauth_states: dict[str, dict] = {}
+def _cleanup_expired_states() -> None:
+    """Remove expired OAuth states to prevent memory growth."""
+    now = time.time()
+    expired = [k for k, v in oauth_states.items() if now > v.get("expires_at", 0)]
+    for k in expired:
+        del oauth_states[k]
 def get_redirect_uri(request: Request) -> str:
     """Get the OAuth callback redirect URI."""
     # In HF Spaces, use the SPACE_HOST if available
             detail="OAuth not configured. Set OAUTH_CLIENT_ID environment variable.",
         )
+    # Clean up expired states to prevent memory growth
+    _cleanup_expired_states()
     # Generate state for CSRF protection
     state = secrets.token_urlsafe(32)
+    oauth_states[state] = {
+        "redirect_uri": get_redirect_uri(request),
+        "expires_at": time.time() + _OAUTH_STATE_TTL,
+    }
     # Build authorization URL
     params = {
         "client_id": OAUTH_CLIENT_ID,
         "redirect_uri": get_redirect_uri(request),
+        "scope": "openid profile read-repos write-repos contribute-repos manage-repos inference-api jobs write-discussions",
         "response_type": "code",
         "state": state,
+        "orgIds": os.environ.get(
+            "HF_OAUTH_ORG_ID", "698dbf55845d85df163175f1"
+        ),  # ml-agent-explorers
     }
     auth_url = f"{OPENID_PROVIDER_URL}/oauth/authorize?{urlencode(params)}"
     # Get user info
     access_token = token_data.get("access_token")
+    if not access_token:
+        raise HTTPException(
+            status_code=500,
+            detail="Token exchange succeeded but no access_token was returned.",
+        )
+    # Fetch user info (optional — failure is not fatal)
+    async with httpx.AsyncClient() as client:
+        try:
+            userinfo_response = await client.get(
+                f"{OPENID_PROVIDER_URL}/oauth/userinfo",
+                headers={"Authorization": f"Bearer {access_token}"},
+            )
+            userinfo_response.raise_for_status()
+        except httpx.HTTPError:
+            pass  # user_info not required for auth flow
+    # Set access token as HttpOnly cookie (not in URL — avoids leaks via
+    # Referrer headers, browser history, and server logs)
+    is_production = bool(os.environ.get("SPACE_HOST"))
+    response = RedirectResponse(url="/", status_code=302)
+    response.set_cookie(
+        key="hf_access_token",
+        value=access_token,
+        httponly=True,
+        secure=is_production,  # Secure flag only in production (HTTPS)
+        samesite="lax",
+        max_age=3600 * 24,  # 24 hours
+        path="/",
+    )
+    return response
 @router.get("/logout")
 async def logout() -> RedirectResponse:
+    """Log out the user by clearing the auth cookie."""
+    response = RedirectResponse(url="/")
+    response.delete_cookie(key="hf_access_token", path="/")
+    return response
+@router.get("/status")
+async def auth_status() -> dict:
+    """Check if OAuth is enabled on this instance."""
+    return {"auth_enabled": AUTH_ENABLED}
+@router.get("/me")
+async def get_me(user: dict = Depends(get_current_user)) -> dict:
+    """Get current user info. Returns the authenticated user or dev user.
+    Uses the shared auth dependency which handles cookie + Bearer token.
+    """
+    return user

backend/session_manager.py CHANGED Viewed

@@ -48,11 +48,28 @@ class AgentSession:
     session: Session
     tool_router: ToolRouter
     submission_queue: asyncio.Queue
     task: asyncio.Task | None = None
     created_at: datetime = field(default_factory=datetime.utcnow)
     is_active: bool = True
 class SessionManager:
     """Manages multiple concurrent agent sessions."""
@@ -61,19 +78,69 @@ class SessionManager:
         self.sessions: dict[str, AgentSession] = {}
         self._lock = asyncio.Lock()
-    async def create_session(self) -> str:
-        """Create a new agent session and return its ID."""
         session_id = str(uuid.uuid4())
         # Create queues for this session
         submission_queue: asyncio.Queue = asyncio.Queue()
         event_queue: asyncio.Queue = asyncio.Queue()
-        # Create tool router
-        tool_router = ToolRouter(self.config.mcpServers)
-        # Create the agent session
-        session = Session(event_queue, config=self.config, tool_router=tool_router)
         # Create wrapper
         agent_session = AgentSession(
@@ -81,6 +148,8 @@ class SessionManager:
             session=session,
             tool_router=tool_router,
             submission_queue=submission_queue,
         )
         async with self._lock:
@@ -92,7 +161,7 @@ class SessionManager:
         )
         agent_session.task = task
-        logger.info(f"Created session {session_id}")
         return session_id
     async def _run_session(
@@ -245,6 +314,27 @@ class SessionManager:
         return True
     def get_session_info(self, session_id: str) -> dict[str, Any] | None:
         """Get information about a session."""
         agent_session = self.sessions.get(session_id)
@@ -256,15 +346,25 @@ class SessionManager:
             "created_at": agent_session.created_at.isoformat(),
             "is_active": agent_session.is_active,
             "message_count": len(agent_session.session.context_manager.items),
         }
-    def list_sessions(self) -> list[dict[str, Any]]:
-        """List all sessions."""
-        return [
-            self.get_session_info(sid)
-            for sid in self.sessions
-            if self.get_session_info(sid)
-        ]
     @property
     def active_session_count(self) -> int:

     session: Session
     tool_router: ToolRouter
     submission_queue: asyncio.Queue
+    user_id: str = "dev"  # Owner of this session
+    hf_token: str | None = None  # User's HF OAuth token for tool execution
     task: asyncio.Task | None = None
     created_at: datetime = field(default_factory=datetime.utcnow)
     is_active: bool = True
+class SessionCapacityError(Exception):
+    """Raised when no more sessions can be created."""
+    def __init__(self, message: str, error_type: str = "global") -> None:
+        super().__init__(message)
+        self.error_type = error_type  # "global" or "per_user"
+# ── Capacity limits ─────────────────────────────────────────────────
+# Estimated for HF Spaces cpu-basic (2 vCPU, 16 GB RAM).
+# Each session uses ~10-20 MB (context, tools, queues, task).
+MAX_SESSIONS: int = 50
+MAX_SESSIONS_PER_USER: int = 10
 class SessionManager:
     """Manages multiple concurrent agent sessions."""
         self.sessions: dict[str, AgentSession] = {}
         self._lock = asyncio.Lock()
+    def _count_user_sessions(self, user_id: str) -> int:
+        """Count active sessions owned by a specific user."""
+        return sum(
+            1
+            for s in self.sessions.values()
+            if s.user_id == user_id and s.is_active
+        )
+    async def create_session(self, user_id: str = "dev", hf_token: str | None = None) -> str:
+        """Create a new agent session and return its ID.
+        Session() and ToolRouter() constructors contain blocking I/O
+        (e.g. HfApi().whoami(), litellm.get_max_tokens()) so they are
+        executed in a thread pool to avoid freezing the async event loop.
+        Args:
+            user_id: The ID of the user who owns this session.
+        Raises:
+            SessionCapacityError: If the server or user has reached the
+                maximum number of concurrent sessions.
+        """
+        # ── Capacity checks ──────────────────────────────────────────
+        async with self._lock:
+            active_count = self.active_session_count
+            if active_count >= MAX_SESSIONS:
+                raise SessionCapacityError(
+                    f"Server is at capacity ({active_count}/{MAX_SESSIONS} sessions). "
+                    "Please try again later.",
+                    error_type="global",
+                )
+            if user_id != "dev":
+                user_count = self._count_user_sessions(user_id)
+                if user_count >= MAX_SESSIONS_PER_USER:
+                    raise SessionCapacityError(
+                        f"You have reached the maximum of {MAX_SESSIONS_PER_USER} "
+                        "concurrent sessions. Please close an existing session first.",
+                        error_type="per_user",
+                    )
         session_id = str(uuid.uuid4())
         # Create queues for this session
         submission_queue: asyncio.Queue = asyncio.Queue()
         event_queue: asyncio.Queue = asyncio.Queue()
+        # Run blocking constructors in a thread to keep the event loop responsive.
+        # Without this, Session.__init__ → ContextManager → litellm.get_max_tokens()
+        # blocks all HTTP/WebSocket handling.
+        import time as _time
+        def _create_session_sync():
+            t0 = _time.monotonic()
+            tool_router = ToolRouter(self.config.mcpServers)
+            session = Session(event_queue, config=self.config, tool_router=tool_router)
+            t1 = _time.monotonic()
+            logger.info(f"Session initialized in {t1 - t0:.2f}s")
+            return tool_router, session
+        tool_router, session = await asyncio.to_thread(_create_session_sync)
+        # Store user's HF token on the session so tools can use it
+        session.hf_token = hf_token
         # Create wrapper
         agent_session = AgentSession(
             session=session,
             tool_router=tool_router,
             submission_queue=submission_queue,
+            user_id=user_id,
+            hf_token=hf_token,
         )
         async with self._lock:
         )
         agent_session.task = task
+        logger.info(f"Created session {session_id} for user {user_id}")
         return session_id
     async def _run_session(
         return True
+    def get_session_owner(self, session_id: str) -> str | None:
+        """Get the user_id that owns a session, or None if session doesn't exist."""
+        agent_session = self.sessions.get(session_id)
+        if not agent_session:
+            return None
+        return agent_session.user_id
+    def verify_session_access(self, session_id: str, user_id: str) -> bool:
+        """Check if a user has access to a session.
+        Returns True if:
+        - The session exists AND the user owns it
+        - The user_id is "dev" (dev mode bypass)
+        """
+        owner = self.get_session_owner(session_id)
+        if owner is None:
+            return False
+        if user_id == "dev" or owner == "dev":
+            return True
+        return owner == user_id
     def get_session_info(self, session_id: str) -> dict[str, Any] | None:
         """Get information about a session."""
         agent_session = self.sessions.get(session_id)
             "created_at": agent_session.created_at.isoformat(),
             "is_active": agent_session.is_active,
             "message_count": len(agent_session.session.context_manager.items),
+            "user_id": agent_session.user_id,
         }
+    def list_sessions(self, user_id: str | None = None) -> list[dict[str, Any]]:
+        """List sessions, optionally filtered by user.
+        Args:
+            user_id: If provided, only return sessions owned by this user.
+                     If "dev", return all sessions (dev mode).
+        """
+        results = []
+        for sid in self.sessions:
+            info = self.get_session_info(sid)
+            if not info:
+                continue
+            if user_id and user_id != "dev" and info.get("user_id") != user_id:
+                continue
+            results.append(info)
+        return results
     @property
     def active_session_count(self) -> int:

backend/websocket.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """WebSocket connection manager for real-time communication."""
-import asyncio
 import logging
 from typing import Any
@@ -15,23 +14,18 @@ class ConnectionManager:
     def __init__(self) -> None:
         # session_id -> WebSocket
         self.active_connections: dict[str, WebSocket] = {}
-        # session_id -> asyncio.Queue for outgoing messages
-        self.message_queues: dict[str, asyncio.Queue] = {}
     async def connect(self, websocket: WebSocket, session_id: str) -> None:
         """Accept a WebSocket connection and register it."""
         logger.info(f"Attempting to accept WebSocket for session {session_id}")
         await websocket.accept()
         self.active_connections[session_id] = websocket
-        self.message_queues[session_id] = asyncio.Queue()
         logger.info(f"WebSocket connected and registered for session {session_id}")
     def disconnect(self, session_id: str) -> None:
         """Remove a WebSocket connection."""
         if session_id in self.active_connections:
             del self.active_connections[session_id]
-        if session_id in self.message_queues:
-            del self.message_queues[session_id]
         logger.info(f"WebSocket disconnected for session {session_id}")
     async def send_event(
@@ -63,10 +57,6 @@ class ConnectionManager:
         """Check if a session has an active WebSocket connection."""
         return session_id in self.active_connections
-    def get_queue(self, session_id: str) -> asyncio.Queue | None:
-        """Get the message queue for a session."""
-        return self.message_queues.get(session_id)
 # Global connection manager instance
 manager = ConnectionManager()

 """WebSocket connection manager for real-time communication."""
 import logging
 from typing import Any
     def __init__(self) -> None:
         # session_id -> WebSocket
         self.active_connections: dict[str, WebSocket] = {}
     async def connect(self, websocket: WebSocket, session_id: str) -> None:
         """Accept a WebSocket connection and register it."""
         logger.info(f"Attempting to accept WebSocket for session {session_id}")
         await websocket.accept()
         self.active_connections[session_id] = websocket
         logger.info(f"WebSocket connected and registered for session {session_id}")
     def disconnect(self, session_id: str) -> None:
         """Remove a WebSocket connection."""
         if session_id in self.active_connections:
             del self.active_connections[session_id]
         logger.info(f"WebSocket disconnected for session {session_id}")
     async def send_event(
         """Check if a session has an active WebSocket connection."""
         return session_id in self.active_connections
 # Global connection manager instance
 manager = ConnectionManager()

configs/main_agent_config.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "model_name": "anthropic/claude-opus-4-5-20251101",
   "save_sessions": true,
   "session_dataset_repo": "akseljoonas/hf-agent-sessions",
   "yolo_mode": false,
-  "confirm_cpu_jobs": false,
   "auto_file_upload": true,
   "mcpServers": {
     "hf-mcp-server": {

 {
+  "model_name": "huggingface/novita/moonshotai/kimi-k2.5",
   "save_sessions": true,
   "session_dataset_repo": "akseljoonas/hf-agent-sessions",
   "yolo_mode": false,
+  "confirm_cpu_jobs": true,
   "auto_file_upload": true,
   "mcpServers": {
     "hf-mcp-server": {

frontend/package-lock.json CHANGED Viewed

@@ -8,10 +8,12 @@
       "name": "hf-agent-frontend",
       "version": "1.0.0",
       "dependencies": {
         "@emotion/react": "^11.13.0",
         "@emotion/styled": "^11.13.0",
         "@mui/icons-material": "^6.1.0",
         "@mui/material": "^6.1.0",
         "react": "^18.3.1",
         "react-dom": "^18.3.1",
         "react-markdown": "^9.0.1",
@@ -34,6 +36,70 @@
         "vite": "^5.4.10"
       }
     },
     "node_modules/@babel/code-frame": {
       "version": "7.28.6",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz",
@@ -1348,6 +1414,15 @@
         }
       }
     },
     "node_modules/@popperjs/core": {
       "version": "2.11.8",
       "resolved": "https://registry.npmjs.org/@popperjs/core/-/core-2.11.8.tgz",
@@ -1715,6 +1790,12 @@
         "win32"
       ]
     },
     "node_modules/@types/babel__core": {
       "version": "7.20.5",
       "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
@@ -2155,6 +2236,15 @@
       "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==",
       "license": "ISC"
     },
     "node_modules/@vitejs/plugin-react": {
       "version": "4.7.0",
       "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz",
@@ -2200,6 +2290,24 @@
         "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
       }
     },
     "node_modules/ajv": {
       "version": "6.12.6",
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@@ -2848,6 +2956,15 @@
         "node": ">=0.10.0"
       }
     },
     "node_modules/extend": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
@@ -3356,6 +3473,12 @@
       "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
       "license": "MIT"
     },
     "node_modules/json-schema-traverse": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
@@ -5052,6 +5175,31 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
     "node_modules/tinyglobby": {
       "version": "0.2.15",
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
@@ -5282,6 +5430,16 @@
         "punycode": "^2.1.0"
       }
     },
     "node_modules/vfile": {
       "version": "6.0.3",
       "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
@@ -5426,6 +5584,16 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
     "node_modules/zustand": {
       "version": "5.0.10",
       "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.10.tgz",

       "name": "hf-agent-frontend",
       "version": "1.0.0",
       "dependencies": {
+        "@ai-sdk/react": "^3.0.93",
         "@emotion/react": "^11.13.0",
         "@emotion/styled": "^11.13.0",
         "@mui/icons-material": "^6.1.0",
         "@mui/material": "^6.1.0",
+        "ai": "^6.0.91",
         "react": "^18.3.1",
         "react-dom": "^18.3.1",
         "react-markdown": "^9.0.1",
         "vite": "^5.4.10"
       }
     },
+    "node_modules/@ai-sdk/gateway": {
+      "version": "3.0.50",
+      "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-3.0.50.tgz",
+      "integrity": "sha512-Jdd1a8VgbD7l7r+COj0h5SuaYRfPvOJ/AO6l0OrmTPEcI2MUQPr3C4JttfpNkcheEN+gOdy0CtZWuG17bW2fjw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@ai-sdk/provider": "3.0.8",
+        "@ai-sdk/provider-utils": "4.0.15",
+        "@vercel/oidc": "3.1.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.76 || ^4.1.8"
+      }
+    },
+    "node_modules/@ai-sdk/provider": {
+      "version": "3.0.8",
+      "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-3.0.8.tgz",
+      "integrity": "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "json-schema": "^0.4.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@ai-sdk/provider-utils": {
+      "version": "4.0.15",
+      "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-4.0.15.tgz",
+      "integrity": "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@ai-sdk/provider": "3.0.8",
+        "@standard-schema/spec": "^1.1.0",
+        "eventsource-parser": "^3.0.6"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.76 || ^4.1.8"
+      }
+    },
+    "node_modules/@ai-sdk/react": {
+      "version": "3.0.93",
+      "resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-3.0.93.tgz",
+      "integrity": "sha512-FY1HmeAfCpiAGLhIZh2QR8QFzHFZfhjMmkA9D5KC/O3eGqPeY7CwBABLkzRH+5Gkf+MfxXnEm4VF0MpmvDMjpg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@ai-sdk/provider-utils": "4.0.15",
+        "ai": "6.0.91",
+        "swr": "^2.2.5",
+        "throttleit": "2.1.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "react": "^18 || ~19.0.1 || ~19.1.2 || ^19.2.1"
+      }
+    },
     "node_modules/@babel/code-frame": {
       "version": "7.28.6",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz",
         }
       }
     },
+    "node_modules/@opentelemetry/api": {
+      "version": "1.9.0",
+      "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
+      "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=8.0.0"
+      }
+    },
     "node_modules/@popperjs/core": {
       "version": "2.11.8",
       "resolved": "https://registry.npmjs.org/@popperjs/core/-/core-2.11.8.tgz",
         "win32"
       ]
     },
+    "node_modules/@standard-schema/spec": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz",
+      "integrity": "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==",
+      "license": "MIT"
+    },
     "node_modules/@types/babel__core": {
       "version": "7.20.5",
       "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
       "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==",
       "license": "ISC"
     },
+    "node_modules/@vercel/oidc": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/@vercel/oidc/-/oidc-3.1.0.tgz",
+      "integrity": "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">= 20"
+      }
+    },
     "node_modules/@vitejs/plugin-react": {
       "version": "4.7.0",
       "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz",
         "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
       }
     },
+    "node_modules/ai": {
+      "version": "6.0.91",
+      "resolved": "https://registry.npmjs.org/ai/-/ai-6.0.91.tgz",
+      "integrity": "sha512-k1/8BusZMhYVxxLZt0BUZzm9HVDCCh117nyWfWUx5xjR2+tWisJbXgysL7EBMq2lgyHwgpA1jDR3tVjWSdWZXw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@ai-sdk/gateway": "3.0.50",
+        "@ai-sdk/provider": "3.0.8",
+        "@ai-sdk/provider-utils": "4.0.15",
+        "@opentelemetry/api": "1.9.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.76 || ^4.1.8"
+      }
+    },
     "node_modules/ajv": {
       "version": "6.12.6",
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
         "node": ">=0.10.0"
       }
     },
+    "node_modules/eventsource-parser": {
+      "version": "3.0.6",
+      "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz",
+      "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
     "node_modules/extend": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
       "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
       "license": "MIT"
     },
+    "node_modules/json-schema": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
+      "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==",
+      "license": "(AFL-2.1 OR BSD-3-Clause)"
+    },
     "node_modules/json-schema-traverse": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/swr": {
+      "version": "2.4.0",
+      "resolved": "https://registry.npmjs.org/swr/-/swr-2.4.0.tgz",
+      "integrity": "sha512-sUlC20T8EOt1pHmDiqueUWMmRRX03W7w5YxovWX7VR2KHEPCTMly85x05vpkP5i6Bu4h44ePSMD9Tc+G2MItFw==",
+      "license": "MIT",
+      "dependencies": {
+        "dequal": "^2.0.3",
+        "use-sync-external-store": "^1.6.0"
+      },
+      "peerDependencies": {
+        "react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      }
+    },
+    "node_modules/throttleit": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz",
+      "integrity": "sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/tinyglobby": {
       "version": "0.2.15",
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
         "punycode": "^2.1.0"
       }
     },
+    "node_modules/use-sync-external-store": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz",
+      "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==",
+      "license": "MIT",
+      "peer": true,
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      }
+    },
     "node_modules/vfile": {
       "version": "6.0.3",
       "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/zod": {
+      "version": "4.3.6",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
+      "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
+      "license": "MIT",
+      "peer": true,
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    },
     "node_modules/zustand": {
       "version": "5.0.10",
       "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.10.tgz",

frontend/package.json CHANGED Viewed

@@ -10,10 +10,12 @@
     "preview": "vite preview"
   },
   "dependencies": {
     "@emotion/react": "^11.13.0",
     "@emotion/styled": "^11.13.0",
     "@mui/icons-material": "^6.1.0",
     "@mui/material": "^6.1.0",
     "react": "^18.3.1",
     "react-dom": "^18.3.1",
     "react-markdown": "^9.0.1",

     "preview": "vite preview"
   },
   "dependencies": {
+    "@ai-sdk/react": "^3.0.93",
     "@emotion/react": "^11.13.0",
     "@emotion/styled": "^11.13.0",
     "@mui/icons-material": "^6.1.0",
     "@mui/material": "^6.1.0",
+    "ai": "^6.0.91",
     "react": "^18.3.1",
     "react-dom": "^18.3.1",
     "react-markdown": "^9.0.1",

frontend/src/App.tsx CHANGED Viewed

@@ -1,7 +1,12 @@
 import { Box } from '@mui/material';
 import AppLayout from '@/components/Layout/AppLayout';
 function App() {
   return (
     <Box sx={{ height: '100vh', display: 'flex' }}>
       <AppLayout />

 import { Box } from '@mui/material';
 import AppLayout from '@/components/Layout/AppLayout';
+import { useAuth } from '@/hooks/useAuth';
 function App() {
+  // Non-blocking auth check — fires in background, updates store when done.
+  // If auth fails later, apiFetch redirects to /auth/login.
+  useAuth();
   return (
     <Box sx={{ height: '100vh', display: 'flex' }}>
       <AppLayout />

frontend/src/components/ApprovalModal/ApprovalModal.tsx DELETED Viewed

@@ -1,208 +0,0 @@
-import { useState, useCallback } from 'react';
-import {
-  Dialog,
-  DialogTitle,
-  DialogContent,
-  DialogActions,
-  Button,
-  Box,
-  Typography,
-  Checkbox,
-  FormControlLabel,
-  Accordion,
-  AccordionSummary,
-  AccordionDetails,
-  TextField,
-  Chip,
-} from '@mui/material';
-import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
-import WarningIcon from '@mui/icons-material/Warning';
-import { useAgentStore } from '@/store/agentStore';
-interface ApprovalModalProps {
-  sessionId: string | null;
-}
-interface ApprovalState {
-  [toolCallId: string]: {
-    approved: boolean;
-    feedback: string;
-  };
-}
-export default function ApprovalModal({ sessionId }: ApprovalModalProps) {
-  const { pendingApprovals, setPendingApprovals } = useAgentStore();
-  const [approvalState, setApprovalState] = useState<ApprovalState>({});
-  const isOpen = pendingApprovals !== null && pendingApprovals.tools.length > 0;
-  const handleApprovalChange = useCallback(
-    (toolCallId: string, approved: boolean) => {
-      setApprovalState((prev) => ({
-        ...prev,
-        [toolCallId]: {
-          ...prev[toolCallId],
-          approved,
-          feedback: prev[toolCallId]?.feedback || '',
-        },
-      }));
-    },
-    []
-  );
-  const handleFeedbackChange = useCallback(
-    (toolCallId: string, feedback: string) => {
-      setApprovalState((prev) => ({
-        ...prev,
-        [toolCallId]: {
-          ...prev[toolCallId],
-          feedback,
-        },
-      }));
-    },
-    []
-  );
-  const handleSubmit = useCallback(async () => {
-    if (!sessionId || !pendingApprovals) return;
-    const approvals = pendingApprovals.tools.map((tool) => ({
-      tool_call_id: tool.tool_call_id,
-      approved: approvalState[tool.tool_call_id]?.approved ?? false,
-      feedback: approvalState[tool.tool_call_id]?.feedback || null,
-    }));
-    try {
-      await fetch('/api/approve', {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({
-          session_id: sessionId,
-          approvals,
-        }),
-      });
-      setPendingApprovals(null);
-      setApprovalState({});
-    } catch (e) {
-      console.error('Approval submission failed:', e);
-    }
-  }, [sessionId, pendingApprovals, approvalState, setPendingApprovals]);
-  const handleApproveAll = useCallback(() => {
-    if (!pendingApprovals) return;
-    const newState: ApprovalState = {};
-    pendingApprovals.tools.forEach((tool) => {
-      newState[tool.tool_call_id] = { approved: true, feedback: '' };
-    });
-    setApprovalState(newState);
-  }, [pendingApprovals]);
-  const handleRejectAll = useCallback(() => {
-    if (!pendingApprovals) return;
-    const newState: ApprovalState = {};
-    pendingApprovals.tools.forEach((tool) => {
-      newState[tool.tool_call_id] = { approved: false, feedback: '' };
-    });
-    setApprovalState(newState);
-  }, [pendingApprovals]);
-  if (!isOpen || !pendingApprovals) return null;
-  const approvedCount = Object.values(approvalState).filter((s) => s.approved).length;
-  return (
-    <Dialog
-      open={isOpen}
-      maxWidth="md"
-      fullWidth
-      PaperProps={{
-        sx: { bgcolor: 'background.paper' },
-      }}
-    >
-      <DialogTitle sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
-        <WarningIcon color="warning" />
-        Approval Required
-        <Chip
-          label={`${pendingApprovals.count} tool${pendingApprovals.count > 1 ? 's' : ''}`}
-          size="small"
-          sx={{ ml: 1 }}
-        />
-      </DialogTitle>
-      <DialogContent dividers>
-        <Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
-          The following tool calls require your approval before execution:
-        </Typography>
-        {pendingApprovals.tools.map((tool, index) => (
-          <Accordion key={tool.tool_call_id} defaultExpanded={index === 0}>
-            <AccordionSummary expandIcon={<ExpandMoreIcon />}>
-              <Box sx={{ display: 'flex', alignItems: 'center', gap: 2, width: '100%' }}>
-                <FormControlLabel
-                  control={
-                    <Checkbox
-                      checked={approvalState[tool.tool_call_id]?.approved ?? false}
-                      onChange={(e) => {
-                        e.stopPropagation();
-                        handleApprovalChange(tool.tool_call_id, e.target.checked);
-                      }}
-                      onClick={(e) => e.stopPropagation()}
-                    />
-                  }
-                  label=""
-                  sx={{ m: 0 }}
-                />
-                <Chip label={tool.tool} size="small" color="primary" variant="outlined" />
-                <Typography variant="body2" color="text.secondary" sx={{ ml: 'auto' }}>
-                  {approvalState[tool.tool_call_id]?.approved ? 'Approved' : 'Pending'}
-                </Typography>
-              </Box>
-            </AccordionSummary>
-            <AccordionDetails>
-              <Typography variant="subtitle2" gutterBottom>
-                Arguments:
-              </Typography>
-              <Box
-                component="pre"
-                sx={{
-                  bgcolor: 'background.default',
-                  p: 1.5,
-                  borderRadius: 1,
-                  overflow: 'auto',
-                  fontSize: '0.8rem',
-                  maxHeight: 200,
-                }}
-              >
-                {JSON.stringify(tool.arguments, null, 2)}
-              </Box>
-              {!approvalState[tool.tool_call_id]?.approved && (
-                <TextField
-                  fullWidth
-                  size="small"
-                  label="Feedback (optional)"
-                  placeholder="Explain why you're rejecting this..."
-                  value={approvalState[tool.tool_call_id]?.feedback || ''}
-                  onChange={(e) => handleFeedbackChange(tool.tool_call_id, e.target.value)}
-                  sx={{ mt: 2 }}
-                />
-              )}
-            </AccordionDetails>
-          </Accordion>
-        ))}
-      </DialogContent>
-      <DialogActions sx={{ px: 3, py: 2 }}>
-        <Button onClick={handleRejectAll} color="error" variant="outlined">
-          Reject All
-        </Button>
-        <Button onClick={handleApproveAll} color="success" variant="outlined">
-          Approve All
-        </Button>
-        <Box sx={{ flex: 1 }} />
-        <Typography variant="body2" color="text.secondary" sx={{ mr: 2 }}>
-          {approvedCount} of {pendingApprovals.count} approved
-        </Typography>
-        <Button onClick={handleSubmit} variant="contained" color="primary">
-          Submit
-        </Button>
-      </DialogActions>
-    </Dialog>
-  );
-}

frontend/src/components/Chat/ActivityStatusBar.tsx ADDED Viewed

	@@ -0,0 +1,57 @@

+import { Box, Typography } from '@mui/material';
+import { keyframes } from '@mui/system';
+import { useAgentStore, type ActivityStatus } from '@/store/agentStore';
+const shimmer = keyframes`
+  0% { background-position: -100% center; }
+  50% { background-position: 200% center; }
+  100% { background-position: -100% center; }
+`;
+const TOOL_LABELS: Record<string, string> = {
+  hf_jobs: 'Running job',
+  hf_repo_files: 'Uploading file',
+  hf_repo_git: 'Git operation',
+  hf_inspect_dataset: 'Inspecting dataset',
+  hf_search: 'Searching',
+  plan_tool: 'Planning',
+};
+function statusLabel(status: ActivityStatus): string {
+  switch (status.type) {
+    case 'thinking': return 'Thinking';
+    case 'streaming': return 'Writing';
+    case 'tool': return TOOL_LABELS[status.toolName] || `Running ${status.toolName}`;
+    case 'waiting-approval': return 'Waiting for approval';
+    default: return '';
+  }
+}
+export default function ActivityStatusBar() {
+  const activityStatus = useAgentStore(s => s.activityStatus);
+  if (activityStatus.type === 'idle') return null;
+  const label = statusLabel(activityStatus);
+  return (
+    <Box sx={{ px: 2, py: 0.5, minHeight: 28, display: 'flex', alignItems: 'center' }}>
+      <Typography
+        sx={{
+          fontFamily: 'monospace',
+          fontSize: '0.72rem',
+          fontWeight: 500,
+          letterSpacing: '0.02em',
+          background: 'linear-gradient(90deg, var(--muted-text) 30%, var(--text) 50%, var(--muted-text) 70%)',
+          backgroundSize: '250% 100%',
+          backgroundClip: 'text',
+          WebkitBackgroundClip: 'text',
+          WebkitTextFillColor: 'transparent',
+          animation: `${shimmer} 4s ease-in-out infinite`,
+        }}
+      >
+        {label}…
+      </Typography>
+    </Box>
+  );
+}

frontend/src/components/Chat/ApprovalFlow.tsx DELETED Viewed

@@ -1,515 +0,0 @@
-import { useState, useCallback, useEffect } from 'react';
-import { Box, Typography, Button, TextField, IconButton, Link } from '@mui/material';
-import SendIcon from '@mui/icons-material/Send';
-import OpenInNewIcon from '@mui/icons-material/OpenInNew';
-import CheckCircleIcon from '@mui/icons-material/CheckCircle';
-import CancelIcon from '@mui/icons-material/Cancel';
-import LaunchIcon from '@mui/icons-material/Launch';
-import { useAgentStore } from '@/store/agentStore';
-import { useLayoutStore } from '@/store/layoutStore';
-import { useSessionStore } from '@/store/sessionStore';
-import type { Message, ToolApproval } from '@/types/agent';
-interface ApprovalFlowProps {
-  message: Message;
-}
-export default function ApprovalFlow({ message }: ApprovalFlowProps) {
-  const { setPanelContent, setPanelTab, setActivePanelTab, clearPanelTabs, updateMessage } = useAgentStore();
-  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
-  const { activeSessionId } = useSessionStore();
-  const [currentIndex, setCurrentIndex] = useState(0);
-  const [feedback, setFeedback] = useState('');
-  const [decisions, setDecisions] = useState<ToolApproval[]>([]);
-  const approvalData = message.approval;
-  if (!approvalData) return null;
-  const { batch, status } = approvalData;
-  // Parse toolOutput to extract job info (URL, status, logs, errors)
-  let logsContent = '';
-  let showLogsButton = false;
-  let jobUrl = '';
-  let jobStatus = '';
-  let jobFailed = false;
-  let errorMessage = '';
-  if (message.toolOutput) {
-    const output = message.toolOutput;
-    // Extract job URL: **View at:** https://...
-    const urlMatch = output.match(/\*\*View at:\*\*\s*(https:\/\/[^\s\n]+)/);
-    if (urlMatch) {
-      jobUrl = urlMatch[1];
-    }
-    // Extract job status: **Final Status:** ...
-    const statusMatch = output.match(/\*\*Final Status:\*\*\s*([^\n]+)/);
-    if (statusMatch) {
-      jobStatus = statusMatch[1].trim();
-      jobFailed = jobStatus.toLowerCase().includes('error') || jobStatus.toLowerCase().includes('failed');
-    }
-    // Extract logs
-    if (output.includes('**Logs:**')) {
-      const parts = output.split('**Logs:**');
-      if (parts.length > 1) {
-        const logsPart = parts[1].trim();
-        const codeBlockMatch = logsPart.match(/```([\s\S]*?)```/);
-        if (codeBlockMatch) {
-          logsContent = codeBlockMatch[1].trim();
-          showLogsButton = true;
-        }
-      }
-    }
-    // Detect errors - if output exists but doesn't have the expected job completion format
-    // This catches early failures (validation errors, API errors, etc.)
-    const isExpectedFormat = output.includes('**Job ID:**') || output.includes('**View at:**');
-    const looksLikeError = output.toLowerCase().includes('error') ||
-                          output.toLowerCase().includes('failed') ||
-                          output.toLowerCase().includes('exception') ||
-                          output.includes('Traceback');
-    if (!isExpectedFormat || (looksLikeError && !logsContent)) {
-      // This is likely an error message - show it
-      errorMessage = output;
-      jobFailed = true;
-    }
-  }
-  // Sync right panel with current tool
-  useEffect(() => {
-    if (!batch || currentIndex >= batch.tools.length) return;
-    // Only auto-open panel if pending
-    if (status !== 'pending') return;
-    const tool = batch.tools[currentIndex];
-    const args = tool.arguments as any;
-    if (tool.tool === 'hf_jobs' && (args.operation === 'run' || args.operation === 'scheduled run') && args.script) {
-      setPanelContent({
-        title: 'Compute Job Script',
-        content: args.script,
-        language: 'python',
-        parameters: args
-      });
-      // Don't auto-open if already resolved
-    } else if (tool.tool === 'hf_repo_files' && args.operation === 'upload' && args.content) {
-      setPanelContent({
-        title: `File Upload: ${args.path || 'unnamed'}`,
-        content: args.content,
-        parameters: args
-      });
-    }
-  }, [currentIndex, batch, status, setPanelContent]);
-  const handleResolve = useCallback(async (approved: boolean) => {
-    if (!batch || !activeSessionId) return;
-    const currentTool = batch.tools[currentIndex];
-    const newDecisions = [
-      ...decisions,
-      {
-        tool_call_id: currentTool.tool_call_id,
-        approved,
-        feedback: approved ? null : feedback || 'Rejected by user',
-      },
-    ];
-    if (currentIndex < batch.tools.length - 1) {
-      setDecisions(newDecisions);
-      setCurrentIndex(currentIndex + 1);
-      setFeedback('');
-    } else {
-      // All tools in batch resolved
-      try {
-        await fetch('/api/approve', {
-          method: 'POST',
-          headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({
-            session_id: activeSessionId,
-            approvals: newDecisions,
-          }),
-        });
-        // Update message status
-        updateMessage(activeSessionId, message.id, {
-            approval: {
-                ...approvalData!,
-                status: approved ? 'approved' : 'rejected',
-                decisions: newDecisions
-            }
-        });
-      } catch (e) {
-        console.error('Approval submission failed:', e);
-      }
-    }
-  }, [activeSessionId, message.id, batch, currentIndex, feedback, decisions, approvalData, updateMessage]);
-  if (!batch || currentIndex >= batch.tools.length) return null;
-  const currentTool = batch.tools[currentIndex];
-  // Check if script contains push_to_hub or upload_file
-  const args = currentTool.arguments as any;
-  const containsPushToHub = currentTool.tool === 'hf_jobs' && args.script && (args.script.includes('push_to_hub') || args.script.includes('upload_file'));
-  const getToolDescription = (toolName: string, args: any) => {
-    if (toolName === 'hf_jobs') {
-      return (
-        <Box sx={{ flex: 1 }}>
-          <Typography variant="body2" sx={{ color: 'var(--muted-text)' }}>
-            The agent wants to execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>hf_jobs</Box> on{' '}
-            <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>{args.hardware_flavor || 'default'}</Box> with a timeout of{' '}
-            <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>{args.timeout || '30m'}</Box>
-          </Typography>
-        </Box>
-      );
-    }
-    return (
-      <Typography variant="body2" sx={{ color: 'var(--muted-text)', flex: 1 }}>
-        The agent wants to execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{toolName}</Box>
-      </Typography>
-    );
-  };
-  const showCode = () => {
-    const args = currentTool.arguments as any;
-    if (currentTool.tool === 'hf_jobs' && args.script) {
-      // Clear existing tabs and set up script tab (and logs if available)
-      clearPanelTabs();
-      setPanelTab({
-        id: 'script',
-        title: 'Script',
-        content: args.script,
-        language: 'python',
-        parameters: args
-      });
-      // If logs are available (job completed), also add logs tab
-      if (logsContent) {
-        setPanelTab({
-          id: 'logs',
-          title: 'Logs',
-          content: logsContent,
-          language: 'text'
-        });
-      }
-      setActivePanelTab('script');
-      setRightPanelOpen(true);
-      setLeftSidebarOpen(false);
-    } else {
-      setPanelContent({
-        title: `Tool: ${currentTool.tool}`,
-        content: JSON.stringify(args, null, 2),
-        language: 'json',
-        parameters: args
-      });
-      setRightPanelOpen(true);
-      setLeftSidebarOpen(false);
-    }
-  };
-  const handleViewLogs = (e: React.MouseEvent) => {
-    e.stopPropagation();
-    const args = currentTool.arguments as any;
-    // Set up both tabs so user can switch between script and logs
-    clearPanelTabs();
-    if (currentTool.tool === 'hf_jobs' && args.script) {
-      setPanelTab({
-        id: 'script',
-        title: 'Script',
-        content: args.script,
-        language: 'python',
-        parameters: args
-      });
-    }
-    setPanelTab({
-      id: 'logs',
-      title: 'Logs',
-      content: logsContent,
-      language: 'text'
-    });
-    setActivePanelTab('logs');
-    setRightPanelOpen(true);
-    setLeftSidebarOpen(false);
-  };
-  return (
-    <Box
-      className="action-card"
-      sx={{
-        width: '100%',
-        padding: '18px',
-        borderRadius: 'var(--radius-md)',
-        background: 'linear-gradient(180deg, rgba(255,255,255,0.015), transparent)',
-        border: '1px solid rgba(255,255,255,0.03)',
-        display: 'flex',
-        flexDirection: 'column',
-        gap: '12px',
-        opacity: status !== 'pending' && !showLogsButton ? 0.8 : 1
-      }}
-    >
-      <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
-         <Typography variant="subtitle2" sx={{ fontWeight: 600, color: 'var(--text)' }}>
-            {status === 'pending' ? 'Approval Required' : status === 'approved' ? 'Approved' : 'Rejected'}
-         </Typography>
-         <Typography variant="caption" sx={{ color: 'var(--muted-text)' }}>
-            ({currentIndex + 1}/{batch.count})
-         </Typography>
-         {status === 'approved' && <CheckCircleIcon sx={{ fontSize: 18, color: 'var(--accent-green)' }} />}
-         {status === 'rejected' && <CancelIcon sx={{ fontSize: 18, color: 'var(--accent-red)' }} />}
-      </Box>
-      <Box
-        onClick={showCode}
-        sx={{
-            display: 'flex',
-            alignItems: 'center',
-            gap: 1,
-            cursor: 'pointer',
-            p: 1.5,
-            borderRadius: '8px',
-            bgcolor: 'rgba(0,0,0,0.2)',
-            border: '1px solid rgba(255,255,255,0.05)',
-            transition: 'all 0.2s',
-            '&:hover': {
-                bgcolor: 'rgba(255,255,255,0.03)',
-                borderColor: 'var(--accent-primary)',
-            }
-        }}
-      >
-        {getToolDescription(currentTool.tool, currentTool.arguments)}
-        <OpenInNewIcon sx={{ fontSize: 16, color: 'var(--muted-text)', opacity: 0.7 }} />
-      </Box>
-      {/* Script/Logs buttons for hf_jobs - always show when we have a script */}
-      {currentTool.tool === 'hf_jobs' && args.script && (
-        <Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
-          <Box sx={{ display: 'flex', gap: 1, flexWrap: 'wrap' }}>
-            <Button
-              variant="outlined"
-              size="small"
-              onClick={showCode}
-              sx={{
-                textTransform: 'none',
-                borderColor: 'rgba(255,255,255,0.1)',
-                color: 'var(--muted-text)',
-                fontSize: '0.75rem',
-                py: 0.5,
-                '&:hover': {
-                  borderColor: 'var(--accent-primary)',
-                  color: 'var(--accent-primary)',
-                  bgcolor: 'rgba(255,255,255,0.03)'
-                }
-              }}
-            >
-              View Script
-            </Button>
-            <Button
-              variant="outlined"
-              size="small"
-              onClick={handleViewLogs}
-              disabled={!logsContent && status === 'pending'}
-              sx={{
-                textTransform: 'none',
-                borderColor: 'rgba(255,255,255,0.1)',
-                color: logsContent ? 'var(--accent-primary)' : 'var(--muted-text)',
-                fontSize: '0.75rem',
-                py: 0.5,
-                '&:hover': {
-                  borderColor: 'var(--accent-primary)',
-                  bgcolor: 'rgba(255,255,255,0.03)'
-                },
-                '&.Mui-disabled': {
-                  color: 'rgba(255,255,255,0.3)',
-                  borderColor: 'rgba(255,255,255,0.05)',
-                }
-              }}
-            >
-              {logsContent ? 'View Logs' : 'Logs (waiting for job...)'}
-            </Button>
-          </Box>
-          {/* Job URL - only show when we have a specific URL */}
-          {jobUrl && (
-            <Link
-              href={jobUrl}
-              target="_blank"
-              rel="noopener noreferrer"
-              sx={{
-                display: 'flex',
-                alignItems: 'center',
-                gap: 0.5,
-                color: 'var(--accent-primary)',
-                fontSize: '0.75rem',
-                textDecoration: 'none',
-                opacity: 0.9,
-                '&:hover': {
-                  opacity: 1,
-                  textDecoration: 'underline',
-                }
-              }}
-            >
-              <LaunchIcon sx={{ fontSize: 14 }} />
-              View Job on Hugging Face
-            </Link>
-          )}
-          {/* Show job status if available */}
-          {jobStatus && (
-            <Typography
-              variant="caption"
-              sx={{
-                color: jobFailed ? 'var(--accent-red)' : 'var(--accent-green)',
-                fontSize: '0.75rem',
-                fontWeight: 500,
-              }}
-            >
-              Status: {jobStatus}
-            </Typography>
-          )}
-        </Box>
-      )}
-      {containsPushToHub && (
-        <Typography variant="caption" sx={{ color: 'var(--accent-green)', fontSize: '0.75rem', opacity: 0.8, px: 0.5 }}>
-          We've detected the result will be pushed to hub.
-        </Typography>
-      )}
-      {/* Show error message if job failed */}
-      {errorMessage && status !== 'pending' && (
-        <Box
-          sx={{
-            p: 1.5,
-            borderRadius: '8px',
-            bgcolor: 'rgba(224, 90, 79, 0.1)',
-            border: '1px solid rgba(224, 90, 79, 0.3)',
-          }}
-        >
-          <Typography
-            variant="caption"
-            sx={{
-              color: 'var(--accent-red)',
-              fontWeight: 600,
-              display: 'block',
-              mb: 0.5,
-            }}
-          >
-            Error
-          </Typography>
-          <Typography
-            component="pre"
-            sx={{
-              color: 'var(--text)',
-              fontSize: '0.75rem',
-              fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
-              whiteSpace: 'pre-wrap',
-              wordBreak: 'break-word',
-              m: 0,
-              maxHeight: '150px',
-              overflow: 'auto',
-            }}
-          >
-            {errorMessage.length > 500 ? errorMessage.substring(0, 500) + '...' : errorMessage}
-          </Typography>
-        </Box>
-      )}
-      {status === 'pending' && (
-      <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
-        <Box sx={{ display: 'flex', gap: 1 }}>
-            <TextField
-                fullWidth
-                size="small"
-                placeholder="Feedback (optional)"
-                value={feedback}
-                onChange={(e) => setFeedback(e.target.value)}
-                variant="outlined"
-                sx={{
-                    '& .MuiOutlinedInput-root': {
-                        bgcolor: 'rgba(0,0,0,0.2)',
-                        fontFamily: 'inherit',
-                        fontSize: '0.9rem'
-                    }
-                }}
-            />
-            <IconButton
-                onClick={() => handleResolve(false)}
-                disabled={!feedback}
-                title="Reject with feedback"
-                sx={{
-                    color: 'var(--accent-red)',
-                    border: '1px solid rgba(255,255,255,0.05)',
-                    borderRadius: '8px',
-                    width: 40,
-                    height: 40,
-                    '&:hover': {
-                        bgcolor: 'rgba(224, 90, 79, 0.1)',
-                        borderColor: 'var(--accent-red)',
-                    },
-                    '&.Mui-disabled': {
-                        color: 'rgba(255,255,255,0.1)',
-                        borderColor: 'rgba(255,255,255,0.02)'
-                    }
-                }}
-            >
-                <SendIcon fontSize="small" />
-            </IconButton>
-        </Box>
-        <Box className="action-buttons" sx={{ display: 'flex', gap: '10px' }}>
-            <Button
-            className="btn-reject"
-            onClick={() => handleResolve(false)}
-            sx={{
-                flex: 1,
-                background: 'transparent',
-                border: '1px solid rgba(255,255,255,0.05)',
-                color: 'var(--accent-red)',
-                padding: '10px 14px',
-                borderRadius: '10px',
-                '&:hover': {
-                    bgcolor: 'rgba(224, 90, 79, 0.05)',
-                    borderColor: 'var(--accent-red)',
-                }
-            }}
-            >
-            Reject
-            </Button>
-            <Button
-            className="btn-approve"
-            onClick={() => handleResolve(true)}
-            sx={{
-                flex: 1,
-                background: 'transparent',
-                border: '1px solid rgba(255,255,255,0.05)',
-                color: 'var(--accent-green)',
-                padding: '10px 14px',
-                borderRadius: '10px',
-                '&:hover': {
-                    bgcolor: 'rgba(47, 204, 113, 0.05)',
-                    borderColor: 'var(--accent-green)',
-                }
-            }}
-            >
-            Approve
-            </Button>
-        </Box>
-      </Box>
-      )}
-      {status === 'rejected' && decisions.some(d => d.feedback) && (
-        <Typography variant="body2" sx={{ color: 'var(--accent-red)', mt: 1 }}>
-            Feedback: {decisions.find(d => d.feedback)?.feedback}
-        </Typography>
-      )}
-    </Box>
-  );
-}

frontend/src/components/Chat/AssistantMessage.tsx ADDED Viewed

	@@ -0,0 +1,119 @@

+import { useMemo } from 'react';
+import { Box, Stack, Typography } from '@mui/material';
+import MarkdownContent from './MarkdownContent';
+import ToolCallGroup from './ToolCallGroup';
+import type { UIMessage } from 'ai';
+import type { MessageMeta } from '@/types/agent';
+interface AssistantMessageProps {
+  message: UIMessage;
+  isStreaming?: boolean;
+  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null }>) => Promise<boolean>;
+}
+/**
+ * Groups consecutive tool parts together so they render as a single
+ * ToolCallGroup (visually identical to the old segments approach).
+ */
+type DynamicToolPart = Extract<UIMessage['parts'][number], { type: 'dynamic-tool' }>;
+function groupParts(parts: UIMessage['parts']) {
+  const groups: Array<
+    | { kind: 'text'; text: string; idx: number }
+    | { kind: 'tools'; tools: DynamicToolPart[]; idx: number }
+  > = [];
+  for (let i = 0; i < parts.length; i++) {
+    const part = parts[i];
+    if (part.type === 'text') {
+      groups.push({ kind: 'text', text: part.text, idx: i });
+    } else if (part.type === 'dynamic-tool') {
+      const toolPart = part as DynamicToolPart;
+      const last = groups[groups.length - 1];
+      if (last?.kind === 'tools') {
+        last.tools.push(toolPart);
+      } else {
+        groups.push({ kind: 'tools', tools: [toolPart], idx: i });
+      }
+    }
+    // step-start, step-end, etc. are ignored visually
+  }
+  return groups;
+}
+export default function AssistantMessage({ message, isStreaming = false, approveTools }: AssistantMessageProps) {
+  const groups = useMemo(() => groupParts(message.parts), [message.parts]);
+  // Find the last text group index for streaming cursor
+  let lastTextIdx = -1;
+  for (let i = groups.length - 1; i >= 0; i--) {
+    if (groups[i].kind === 'text') { lastTextIdx = i; break; }
+  }
+  const meta = message.metadata as MessageMeta | undefined;
+  const timeStr = meta?.createdAt
+    ? new Date(meta.createdAt).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })
+    : null;
+  if (groups.length === 0) return null;
+  return (
+    <Box sx={{ minWidth: 0 }}>
+      <Stack direction="row" alignItems="baseline" spacing={1} sx={{ mb: 0.5 }}>
+        <Typography
+          variant="caption"
+          sx={{
+            fontWeight: 700,
+            fontSize: '0.72rem',
+            color: 'var(--muted-text)',
+            textTransform: 'uppercase',
+            letterSpacing: '0.04em',
+          }}
+        >
+          Assistant
+        </Typography>
+        {timeStr && (
+          <Typography variant="caption" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem' }}>
+            {timeStr}
+          </Typography>
+        )}
+      </Stack>
+      <Box
+        sx={{
+          maxWidth: { xs: '95%', md: '85%' },
+          bgcolor: 'var(--surface)',
+          borderRadius: 1.5,
+          borderTopLeftRadius: 4,
+          px: { xs: 1.5, md: 2.5 },
+          py: 1.5,
+          border: '1px solid var(--border)',
+        }}
+      >
+        {groups.map((group, i) => {
+          if (group.kind === 'text' && group.text) {
+            return (
+              <MarkdownContent
+                key={group.idx}
+                content={group.text}
+                isStreaming={isStreaming && i === lastTextIdx}
+              />
+            );
+          }
+          if (group.kind === 'tools' && group.tools.length > 0) {
+            return (
+              <ToolCallGroup
+                key={group.idx}
+                tools={group.tools}
+                approveTools={approveTools}
+              />
+            );
+          }
+          return null;
+        })}
+      </Box>
+    </Box>
+  );
+}

frontend/src/components/Chat/ChatInput.tsx CHANGED Viewed

@@ -1,14 +1,103 @@
-import { useState, useCallback, KeyboardEvent } from 'react';
-import { Box, TextField, IconButton, CircularProgress, Typography } from '@mui/material';
 import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';
 interface ChatInputProps {
   onSend: (text: string) => void;
   disabled?: boolean;
 }
-export default function ChatInput({ onSend, disabled = false }: ChatInputProps) {
   const [input, setInput] = useState('');
   const handleSend = useCallback(() => {
     if (input.trim() && !disabled) {
@@ -27,26 +116,48 @@ export default function ChatInput({ onSend, disabled = false }: ChatInputProps)
     [handleSend]
   );
   return (
     <Box
       sx={{
-        pb: 4,
-        pt: 2,
         position: 'relative',
         zIndex: 10,
       }}
     >
-      <Box sx={{ maxWidth: '880px', mx: 'auto', width: '100%', px: 2 }}>
         <Box
           className="composer"
           sx={{
             display: 'flex',
             gap: '10px',
             alignItems: 'flex-start',
-            bgcolor: 'rgba(255,255,255,0.01)',
             borderRadius: 'var(--radius-md)',
             p: '12px',
-            border: '1px solid rgba(255,255,255,0.03)',
             transition: 'box-shadow 0.2s ease, border-color 0.2s ease',
             '&:focus-within': {
                 borderColor: 'var(--accent-yellow)',
@@ -61,9 +172,10 @@ export default function ChatInput({ onSend, disabled = false }: ChatInputProps)
             value={input}
             onChange={(e) => setInput(e.target.value)}
             onKeyDown={handleKeyDown}
-            placeholder="Ask anything..."
             disabled={disabled}
             variant="standard"
             InputProps={{
                 disableUnderline: true,
                 sx: {
@@ -72,7 +184,7 @@ export default function ChatInput({ onSend, disabled = false }: ChatInputProps)
                     fontFamily: 'inherit',
                     padding: 0,
                     lineHeight: 1.5,
-                    minHeight: '56px',
                     alignItems: 'flex-start',
                 }
             }}
@@ -99,7 +211,7 @@ export default function ChatInput({ onSend, disabled = false }: ChatInputProps)
               transition: 'all 0.2s',
               '&:hover': {
                 color: 'var(--accent-yellow)',
-                bgcolor: 'rgba(255,255,255,0.05)',
               },
               '&.Mui-disabled': {
                 opacity: 0.3,
@@ -109,17 +221,108 @@ export default function ChatInput({ onSend, disabled = false }: ChatInputProps)
             {disabled ? <CircularProgress size={20} color="inherit" /> : <ArrowUpwardIcon fontSize="small" />}
           </IconButton>
         </Box>
         {/* Powered By Badge */}
-        <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'center', mt: 1.5, gap: 0.8, opacity: 0.5 }}>
           <Typography variant="caption" sx={{ fontSize: '10px', color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em', fontWeight: 500 }}>
             powered by
           </Typography>
-          <img src="/claude-logo.png" alt="Claude" style={{ height: '12px', objectFit: 'contain' }} />
           <Typography variant="caption" sx={{ fontSize: '10px', color: 'var(--text)', fontWeight: 600, letterSpacing: '0.02em' }}>
-            claude-opus-4-5-20251101
           </Typography>
         </Box>
       </Box>
     </Box>
   );

+import { useState, useCallback, useEffect, useRef, KeyboardEvent } from 'react';
+import { Box, TextField, IconButton, CircularProgress, Typography, Menu, MenuItem, ListItemIcon, ListItemText, Chip } from '@mui/material';
 import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';
+import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
+import { apiFetch } from '@/utils/api';
+// Model configuration
+interface ModelOption {
+  id: string;
+  name: string;
+  description: string;
+  modelPath: string;
+  avatarUrl: string;
+  recommended?: boolean;
+}
+const getHfAvatarUrl = (modelId: string) => {
+  const org = modelId.split('/')[0];
+  return `https://huggingface.co/api/avatars/${org}`;
+};
+const MODEL_OPTIONS: ModelOption[] = [
+  {
+    id: 'minimax-m2.1',
+    name: 'MiniMax M2.1',
+    description: 'Via Novita',
+    modelPath: 'huggingface/novita/minimax/minimax-m2.1',
+    avatarUrl: getHfAvatarUrl('MiniMaxAI/MiniMax-M2.1'),
+    recommended: true,
+  },
+  {
+    id: 'claude-opus',
+    name: 'Claude Opus 4.5',
+    description: 'Anthropic',
+    modelPath: 'anthropic/claude-opus-4-5-20251101',
+    avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
+    recommended: true,
+  },
+  {
+    id: 'kimi-k2.5',
+    name: 'Kimi K2.5',
+    description: 'Via Novita',
+    modelPath: 'huggingface/novita/moonshotai/kimi-k2.5',
+    avatarUrl: getHfAvatarUrl('moonshotai/Kimi-K2.5'),
+  },
+  {
+    id: 'glm-5',
+    name: 'GLM 5',
+    description: 'Via Novita',
+    modelPath: 'huggingface/novita/zai-org/glm-5',
+    avatarUrl: getHfAvatarUrl('zai-org/GLM-5'),
+  },
+];
+const findModelByPath = (path: string): ModelOption | undefined => {
+  return MODEL_OPTIONS.find(m => m.modelPath === path || path?.includes(m.id));
+};
 interface ChatInputProps {
   onSend: (text: string) => void;
   disabled?: boolean;
+  placeholder?: string;
 }
+export default function ChatInput({ onSend, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
   const [input, setInput] = useState('');
+  const inputRef = useRef<HTMLTextAreaElement>(null);
+  const [selectedModelId, setSelectedModelId] = useState<string>(() => {
+    try {
+      const stored = localStorage.getItem('hf-agent-model');
+      if (stored && MODEL_OPTIONS.some(m => m.id === stored)) return stored;
+    } catch { /* localStorage unavailable */ }
+    return MODEL_OPTIONS[0].id;
+  });
+  const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
+  // Sync with backend on mount (backend is source of truth, localStorage is just a cache)
+  useEffect(() => {
+    fetch('/api/config/model')
+      .then((res) => (res.ok ? res.json() : null))
+      .then((data) => {
+        if (data?.current) {
+          const model = findModelByPath(data.current);
+          if (model) {
+            setSelectedModelId(model.id);
+            try { localStorage.setItem('hf-agent-model', model.id); } catch { /* ignore */ }
+          }
+        }
+      })
+      .catch(() => { /* ignore */ });
+  }, []);
+  const selectedModel = MODEL_OPTIONS.find(m => m.id === selectedModelId) || MODEL_OPTIONS[0];
+  // Auto-focus the textarea when the session becomes ready (disabled -> false)
+  useEffect(() => {
+    if (!disabled && inputRef.current) {
+      inputRef.current.focus();
+    }
+  }, [disabled]);
   const handleSend = useCallback(() => {
     if (input.trim() && !disabled) {
     [handleSend]
   );
+  const handleModelClick = (event: React.MouseEvent<HTMLElement>) => {
+    setModelAnchorEl(event.currentTarget);
+  };
+  const handleModelClose = () => {
+    setModelAnchorEl(null);
+  };
+  const handleSelectModel = async (model: ModelOption) => {
+    handleModelClose();
+    try {
+      const res = await apiFetch('/api/config/model', {
+        method: 'POST',
+        body: JSON.stringify({ model: model.modelPath }),
+      });
+      if (res.ok) {
+        setSelectedModelId(model.id);
+        try { localStorage.setItem('hf-agent-model', model.id); } catch { /* ignore */ }
+      }
+    } catch { /* ignore */ }
+  };
   return (
     <Box
       sx={{
+        pb: { xs: 2, md: 4 },
+        pt: { xs: 1, md: 2 },
         position: 'relative',
         zIndex: 10,
       }}
     >
+      <Box sx={{ maxWidth: '880px', mx: 'auto', width: '100%', px: { xs: 0, sm: 1, md: 2 } }}>
         <Box
           className="composer"
           sx={{
             display: 'flex',
             gap: '10px',
             alignItems: 'flex-start',
+            bgcolor: 'var(--composer-bg)',
             borderRadius: 'var(--radius-md)',
             p: '12px',
+            border: '1px solid var(--border)',
             transition: 'box-shadow 0.2s ease, border-color 0.2s ease',
             '&:focus-within': {
                 borderColor: 'var(--accent-yellow)',
             value={input}
             onChange={(e) => setInput(e.target.value)}
             onKeyDown={handleKeyDown}
+            placeholder={placeholder}
             disabled={disabled}
             variant="standard"
+            inputRef={inputRef}
             InputProps={{
                 disableUnderline: true,
                 sx: {
                     fontFamily: 'inherit',
                     padding: 0,
                     lineHeight: 1.5,
+                    minHeight: { xs: '44px', md: '56px' },
                     alignItems: 'flex-start',
                 }
             }}
               transition: 'all 0.2s',
               '&:hover': {
                 color: 'var(--accent-yellow)',
+                bgcolor: 'var(--hover-bg)',
               },
               '&.Mui-disabled': {
                 opacity: 0.3,
             {disabled ? <CircularProgress size={20} color="inherit" /> : <ArrowUpwardIcon fontSize="small" />}
           </IconButton>
         </Box>
         {/* Powered By Badge */}
+        <Box
+          onClick={handleModelClick}
+          sx={{
+            display: 'flex',
+            alignItems: 'center',
+            justifyContent: 'center',
+            mt: 1.5,
+            gap: 0.8,
+            opacity: 0.6,
+            cursor: 'pointer',
+            transition: 'opacity 0.2s',
+            '&:hover': {
+              opacity: 1
+            }
+          }}
+        >
           <Typography variant="caption" sx={{ fontSize: '10px', color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em', fontWeight: 500 }}>
             powered by
           </Typography>
+          <img
+            src={selectedModel.avatarUrl}
+            alt={selectedModel.name}
+            style={{ height: '14px', width: '14px', objectFit: 'contain', borderRadius: '2px' }}
+          />
           <Typography variant="caption" sx={{ fontSize: '10px', color: 'var(--text)', fontWeight: 600, letterSpacing: '0.02em' }}>
+            {selectedModel.name}
           </Typography>
+          <ArrowDropDownIcon sx={{ fontSize: '14px', color: 'var(--muted-text)' }} />
         </Box>
+        {/* Model Selection Menu */}
+        <Menu
+          anchorEl={modelAnchorEl}
+          open={Boolean(modelAnchorEl)}
+          onClose={handleModelClose}
+          anchorOrigin={{
+            vertical: 'top',
+            horizontal: 'center',
+          }}
+          transformOrigin={{
+            vertical: 'bottom',
+            horizontal: 'center',
+          }}
+          slotProps={{
+            paper: {
+              sx: {
+                bgcolor: 'var(--panel)',
+                border: '1px solid var(--divider)',
+                mb: 1,
+                maxHeight: '400px',
+              }
+            }
+          }}
+        >
+          {MODEL_OPTIONS.map((model) => (
+            <MenuItem
+              key={model.id}
+              onClick={() => handleSelectModel(model)}
+              selected={selectedModelId === model.id}
+              sx={{
+                py: 1.5,
+                '&.Mui-selected': {
+                  bgcolor: 'rgba(255,255,255,0.05)',
+                }
+              }}
+            >
+              <ListItemIcon>
+                <img
+                  src={model.avatarUrl}
+                  alt={model.name}
+                  style={{ width: 24, height: 24, borderRadius: '4px', objectFit: 'cover' }}
+                />
+              </ListItemIcon>
+              <ListItemText
+                primary={
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+                    {model.name}
+                    {model.recommended && (
+                      <Chip
+                        label="Recommended"
+                        size="small"
+                        sx={{
+                          height: '18px',
+                          fontSize: '10px',
+                          bgcolor: 'var(--accent-yellow)',
+                          color: '#000',
+                          fontWeight: 600,
+                        }}
+                      />
+                    )}
+                  </Box>
+                }
+                secondary={model.description}
+                secondaryTypographyProps={{
+                  sx: { fontSize: '12px', color: 'var(--muted-text)' }
+                }}
+              />
+            </MenuItem>
+          ))}
+        </Menu>
       </Box>
     </Box>
   );

frontend/src/components/Chat/MarkdownContent.tsx ADDED Viewed

	@@ -0,0 +1,160 @@

+import { useMemo, useRef, useState, useEffect } from 'react';
+import { Box } from '@mui/material';
+import ReactMarkdown from 'react-markdown';
+import remarkGfm from 'remark-gfm';
+import type { SxProps, Theme } from '@mui/material/styles';
+interface MarkdownContentProps {
+  content: string;
+  sx?: SxProps<Theme>;
+  /** When true, shows a blinking cursor and throttles renders. */
+  isStreaming?: boolean;
+}
+/** Shared markdown styles — adapts to light/dark via CSS variables. */
+const markdownSx: SxProps<Theme> = {
+  fontSize: '0.925rem',
+  lineHeight: 1.7,
+  color: 'var(--text)',
+  wordBreak: 'break-word',
+  '& p': { m: 0, mb: 1.5, '&:last-child': { mb: 0 } },
+  '& h1, & h2, & h3, & h4': { mt: 2.5, mb: 1, fontWeight: 600, lineHeight: 1.3 },
+  '& h1': { fontSize: '1.35rem' },
+  '& h2': { fontSize: '1.15rem' },
+  '& h3': { fontSize: '1.05rem' },
+  '& pre': {
+    bgcolor: 'var(--code-bg)',
+    p: 2,
+    borderRadius: 2,
+    overflow: 'auto',
+    fontSize: '0.82rem',
+    lineHeight: 1.6,
+    border: '1px solid var(--tool-border)',
+    my: 2,
+  },
+  '& code': {
+    bgcolor: 'var(--hover-bg)',
+    px: 0.75,
+    py: 0.25,
+    borderRadius: 0.5,
+    fontSize: '0.84rem',
+    fontFamily: '"JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
+  },
+  '& pre code': { bgcolor: 'transparent', p: 0 },
+  '& a': {
+    color: 'var(--accent-yellow)',
+    textDecoration: 'none',
+    fontWeight: 500,
+    '&:hover': { textDecoration: 'underline' },
+  },
+  '& ul, & ol': { pl: 3, my: 1 },
+  '& li': { mb: 0.5 },
+  '& li::marker': { color: 'var(--muted-text)' },
+  '& blockquote': {
+    borderLeft: '3px solid var(--accent-yellow)',
+    pl: 2,
+    ml: 0,
+    my: 1.5,
+    color: 'var(--muted-text)',
+    fontStyle: 'italic',
+  },
+  '& table': {
+    borderCollapse: 'collapse',
+    width: '100%',
+    my: 2,
+    fontSize: '0.85rem',
+  },
+  '& th': {
+    borderBottom: '2px solid var(--border-hover)',
+    textAlign: 'left',
+    p: 1,
+    fontWeight: 600,
+  },
+  '& td': {
+    borderBottom: '1px solid var(--tool-border)',
+    p: 1,
+  },
+  '& hr': {
+    border: 'none',
+    borderTop: '1px solid var(--border)',
+    my: 2,
+  },
+  '& img': {
+    maxWidth: '100%',
+    borderRadius: 2,
+  },
+};
+/**
+ * Throttled content for streaming: render the full markdown through
+ * ReactMarkdown but only re-parse every ~80ms to avoid layout thrashing.
+ * This is the Claude approach — always render as markdown, never split
+ * into raw text. The parser handles incomplete tables gracefully.
+ */
+function useThrottledValue(value: string, isStreaming: boolean, intervalMs = 80): string {
+  const [throttled, setThrottled] = useState(value);
+  const lastUpdate = useRef(0);
+  const pending = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const latestValue = useRef(value);
+  latestValue.current = value;
+  useEffect(() => {
+    if (!isStreaming) {
+      // Not streaming — always use latest value immediately
+      setThrottled(value);
+      return;
+    }
+    const now = Date.now();
+    const elapsed = now - lastUpdate.current;
+    if (elapsed >= intervalMs) {
+      // Enough time passed — update immediately
+      setThrottled(value);
+      lastUpdate.current = now;
+    } else {
+      // Schedule an update for the remaining time
+      if (pending.current) clearTimeout(pending.current);
+      pending.current = setTimeout(() => {
+        setThrottled(latestValue.current);
+        lastUpdate.current = Date.now();
+        pending.current = null;
+      }, intervalMs - elapsed);
+    }
+    return () => {
+      if (pending.current) clearTimeout(pending.current);
+    };
+  }, [value, isStreaming, intervalMs]);
+  // When streaming ends, flush immediately
+  useEffect(() => {
+    if (!isStreaming) {
+      setThrottled(latestValue.current);
+    }
+  }, [isStreaming]);
+  return throttled;
+}
+export default function MarkdownContent({ content, sx, isStreaming = false }: MarkdownContentProps) {
+  // Throttle re-parses during streaming to ~12fps (every 80ms)
+  const displayContent = useThrottledValue(content, isStreaming);
+  const remarkPlugins = useMemo(() => [remarkGfm], []);
+  return (
+    <Box sx={[markdownSx, ...(Array.isArray(sx) ? sx : sx ? [sx] : [])]}>
+      <ReactMarkdown remarkPlugins={remarkPlugins}>{displayContent}</ReactMarkdown>
+    </Box>
+  );
+}

frontend/src/components/Chat/MessageBubble.tsx CHANGED Viewed

@@ -1,215 +1,44 @@
-import { Box, Paper, Typography } from '@mui/material';
-import ReactMarkdown from 'react-markdown';
-import remarkGfm from 'remark-gfm';
-import ApprovalFlow from './ApprovalFlow';
-import type { Message, TraceLog } from '@/types/agent';
-import { useAgentStore } from '@/store/agentStore';
-import { useLayoutStore } from '@/store/layoutStore';
 interface MessageBubbleProps {
-  message: Message;
 }
-// Render a tools segment with clickable tool calls
-function ToolsSegment({ tools }: { tools: TraceLog[] }) {
-  const { showToolOutput } = useAgentStore();
-  const { setRightPanelOpen } = useLayoutStore();
-  const handleToolClick = (log: TraceLog) => {
-    if (log.completed && log.output) {
-      showToolOutput(log);
-      setRightPanelOpen(true);
-    }
-  };
-  return (
-    <Box
-      sx={{
-        bgcolor: 'rgba(0,0,0,0.3)',
-        borderRadius: 1,
-        p: 1.5,
-        border: 1,
-        borderColor: 'rgba(255,255,255,0.05)',
-        my: 1.5,
-      }}
-    >
-      <Box sx={{ display: 'flex', flexDirection: 'column', gap: 0.5 }}>
-        {tools.map((log) => {
-          const isClickable = log.completed && log.output;
-          return (
-            <Typography
-              key={log.id}
-              variant="caption"
-              component="div"
-              onClick={() => handleToolClick(log)}
-              sx={{
-                color: 'var(--muted-text)',
-                fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
-                fontSize: '0.75rem',
-                display: 'flex',
-                alignItems: 'center',
-                gap: 0.5,
-                cursor: isClickable ? 'pointer' : 'default',
-                borderRadius: 0.5,
-                px: 0.5,
-                mx: -0.5,
-                transition: 'background-color 0.15s ease',
-                '&:hover': isClickable ? {
-                  bgcolor: 'rgba(255,255,255,0.05)',
-                } : {},
-              }}
-            >
-              <span style={{
-                color: log.completed
-                  ? (log.success === false ? '#F87171' : '#FDB022')
-                  : 'inherit',
-                fontSize: '0.85rem',
-              }}>
-                {log.completed ? (log.success === false ? '✗' : '✓') : '•'}
-              </span>
-              <span style={{
-                fontWeight: 600,
-                color: isClickable ? 'rgba(255, 255, 255, 0.9)' : 'inherit',
-                textDecoration: isClickable ? 'underline' : 'none',
-                textDecorationColor: 'rgba(255,255,255,0.3)',
-                textUnderlineOffset: '2px',
-              }}>
-                {log.tool}
-              </span>
-              {!log.completed && <span style={{ opacity: 0.6 }}>...</span>}
-              {isClickable && (
-                <span style={{
-                  opacity: 0.4,
-                  fontSize: '0.65rem',
-                  marginLeft: 'auto',
-                }}>
-                  click to view
-                </span>
-              )}
-            </Typography>
-          );
-        })}
-      </Box>
-    </Box>
-  );
-}
-// Markdown styles
-const markdownStyles = {
-  '& p': { m: 0, mb: 1, '&:last-child': { mb: 0 } },
-  '& pre': {
-    bgcolor: 'rgba(0,0,0,0.5)',
-    p: 1.5,
-    borderRadius: 1,
-    overflow: 'auto',
-    fontSize: '0.85rem',
-    border: '1px solid rgba(255,255,255,0.05)',
-  },
-  '& code': {
-    bgcolor: 'rgba(255,255,255,0.05)',
-    px: 0.5,
-    py: 0.25,
-    borderRadius: 0.5,
-    fontSize: '0.85rem',
-    fontFamily: '"JetBrains Mono", monospace',
-  },
-  '& pre code': { bgcolor: 'transparent', p: 0 },
-  '& a': {
-    color: 'var(--accent-yellow)',
-    textDecoration: 'none',
-    '&:hover': { textDecoration: 'underline' },
-  },
-  '& ul, & ol': { pl: 2, my: 1 },
-  '& table': {
-    borderCollapse: 'collapse',
-    width: '100%',
-    my: 2,
-    fontSize: '0.875rem',
-  },
-  '& th': {
-    borderBottom: '1px solid rgba(255,255,255,0.1)',
-    textAlign: 'left',
-    p: 1,
-    bgcolor: 'rgba(255,255,255,0.02)',
-  },
-  '& td': {
-    borderBottom: '1px solid rgba(255,255,255,0.05)',
-    p: 1,
-  },
-};
-export default function MessageBubble({ message }: MessageBubbleProps) {
-  const isUser = message.role === 'user';
-  const isAssistant = message.role === 'assistant';
-  if (message.approval) {
     return (
-      <Box sx={{ width: '100%', maxWidth: '880px', mx: 'auto', my: 2 }}>
-        <ApprovalFlow message={message} />
-      </Box>
     );
   }
-  // Render segments chronologically if available, otherwise fall back to content
-  const renderContent = () => {
-    if (message.segments && message.segments.length > 0) {
-      return message.segments.map((segment, idx) => {
-        if (segment.type === 'text' && segment.content) {
-          return (
-            <Box key={idx} sx={markdownStyles}>
-              <ReactMarkdown remarkPlugins={[remarkGfm]}>{segment.content}</ReactMarkdown>
-            </Box>
-          );
-        }
-        if (segment.type === 'tools' && segment.tools && segment.tools.length > 0) {
-          return <ToolsSegment key={idx} tools={segment.tools} />;
-        }
-        return null;
-      });
-    }
-    // Fallback: just render content
     return (
-      <Box sx={markdownStyles}>
-        <ReactMarkdown remarkPlugins={[remarkGfm]}>{message.content}</ReactMarkdown>
-      </Box>
     );
-  };
-  return (
-    <Box
-      sx={{
-        display: 'flex',
-        justifyContent: isUser ? 'flex-end' : 'flex-start',
-        width: '100%',
-        maxWidth: '880px',
-        mx: 'auto',
-      }}
-    >
-      <Paper
-        elevation={0}
-        className={`message ${isUser ? 'user' : isAssistant ? 'assistant' : ''}`}
-        sx={{
-          p: '14px 18px',
-          margin: '10px 0',
-          maxWidth: '100%',
-          borderRadius: 'var(--radius-lg)',
-          borderTopLeftRadius: isAssistant ? '6px' : undefined,
-          lineHeight: 1.45,
-          boxShadow: 'var(--shadow-1)',
-          border: '1px solid rgba(255,255,255,0.03)',
-          background: 'linear-gradient(180deg, rgba(255,255,255,0.015), transparent)',
-        }}
-      >
-        {renderContent()}
-        <Typography
-          className="meta"
-          variant="caption"
-          sx={{ display: 'block', textAlign: 'right', mt: 1, fontSize: '11px', opacity: 0.5 }}
-        >
-          {new Date(message.timestamp).toLocaleTimeString()}
-        </Typography>
-      </Paper>
-    </Box>
-  );
 }

+import UserMessage from './UserMessage';
+import AssistantMessage from './AssistantMessage';
+import type { UIMessage } from 'ai';
 interface MessageBubbleProps {
+  message: UIMessage;
+  isLastTurn?: boolean;
+  onUndoTurn?: () => void;
+  isProcessing?: boolean;
+  isStreaming?: boolean;
+  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null }>) => Promise<boolean>;
 }
+export default function MessageBubble({
+  message,
+  isLastTurn = false,
+  onUndoTurn,
+  isProcessing = false,
+  isStreaming = false,
+  approveTools,
+}: MessageBubbleProps) {
+  if (message.role === 'user') {
     return (
+      <UserMessage
+        message={message}
+        isLastTurn={isLastTurn}
+        onUndoTurn={onUndoTurn}
+        isProcessing={isProcessing}
+      />
     );
   }
+  if (message.role === 'assistant') {
     return (
+      <AssistantMessage
+        message={message}
+        isStreaming={isStreaming}
+        approveTools={approveTools}
+      />
     );
+  }
+  return null;
 }

frontend/src/components/Chat/MessageList.tsx CHANGED Viewed

@@ -1,100 +1,151 @@
-import { useEffect, useRef } from 'react';
-import { Box, Typography } from '@mui/material';
-import { useSessionStore } from '@/store/sessionStore';
 import MessageBubble from './MessageBubble';
-import type { Message } from '@/types/agent';
 interface MessageListProps {
-  messages: Message[];
   isProcessing: boolean;
 }
-const TechnicalIndicator = () => (
-  <Box
-    component="span"
-    sx={{
-      color: 'primary.main',
-      fontFamily: 'monospace',
-      fontWeight: 'bold',
-      fontSize: '1.2rem',
-      lineHeight: 0,
-      display: 'inline-block',
-      verticalAlign: 'middle',
-      width: '1em',
-      letterSpacing: '-3px',
-      transform: 'scale(0.6) translateY(-2px)',
-      '&::after': {
-        content: '""',
-        animation: 'dots 2s steps(4, end) infinite',
-      },
-      '@keyframes dots': {
-        '0%': { content: '""' },
-        '25%': { content: '"."' },
-        '50%': { content: '".."' },
-        '75%, 100%': { content: '"..."' },
-      },
-    }}
-  />
-);
-export default function MessageList({ messages, isProcessing }: MessageListProps) {
-  const bottomRef = useRef<HTMLDivElement>(null);
-  const { activeSessionId } = useSessionStore();
-  // Auto-scroll to bottom when new messages arrive
   useEffect(() => {
-    bottomRef.current?.scrollIntoView({ behavior: 'smooth' });
-  }, [messages, isProcessing]);
   return (
     <Box
       sx={{
         flex: 1,
         overflow: 'auto',
-        p: 2,
         display: 'flex',
         flexDirection: 'column',
       }}
     >
-      <Box sx={{ maxWidth: 'md', mx: 'auto', width: '100%', display: 'flex', flexDirection: 'column', gap: 2 }}>
         {messages.length === 0 && !isProcessing ? (
-          <Box
-            sx={{
-              flex: 1,
-              display: 'flex',
-              alignItems: 'center',
-              justifyContent: 'center',
-              py: 8,
-            }}
-          >
-            <Typography color="text.secondary" sx={{ fontFamily: 'monospace' }}>
-              Awaiting input…
-            </Typography>
-          </Box>
         ) : (
-          messages.map((message) => (
-            <MessageBubble key={message.id} message={message} />
           ))
         )}
-        {isProcessing && (
-          <Box sx={{ width: '100%', mb: 2 }}>
-            <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1, px: 0.5 }}>
-              <Typography variant="caption" color="text.secondary" sx={{ fontFamily: 'monospace', fontWeight: 600 }}>
-                Thinking
-              </Typography>
-              <TechnicalIndicator />
-            </Box>
-          </Box>
-        )}
-        {activeSessionId && (
-          // ApprovalFlow is now handled within messages
-          null
-        )}
-        <div ref={bottomRef} />
-      </Box>
     </Box>
   );
-}

+import { useCallback, useEffect, useRef, useMemo } from 'react';
+import { Box, Stack, Typography } from '@mui/material';
 import MessageBubble from './MessageBubble';
+import ActivityStatusBar from './ActivityStatusBar';
+import { useAgentStore } from '@/store/agentStore';
+import type { UIMessage } from 'ai';
 interface MessageListProps {
+  messages: UIMessage[];
   isProcessing: boolean;
+  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null }>) => Promise<boolean>;
+  onUndoLastTurn: () => void | Promise<void>;
 }
+function getGreeting(): string {
+  const h = new Date().getHours();
+  if (h < 12) return 'Morning';
+  if (h < 17) return 'Afternoon';
+  return 'Evening';
+}
+function WelcomeGreeting() {
+  const { user } = useAgentStore();
+  const firstName = user?.name?.split(' ')[0] || user?.username;
+  const greeting = firstName ? `${getGreeting()}, ${firstName}` : getGreeting();
+  return (
+    <Box
+      sx={{
+        flex: 1,
+        display: 'flex',
+        flexDirection: 'column',
+        alignItems: 'center',
+        justifyContent: 'center',
+        py: 8,
+        gap: 1.5,
+      }}
+    >
+      <Typography
+        sx={{
+          fontFamily: 'monospace',
+          fontSize: '1.6rem',
+          color: 'var(--text)',
+          fontWeight: 600,
+        }}
+      >
+        {greeting}
+      </Typography>
+      <Typography
+        color="text.secondary"
+        sx={{ fontFamily: 'monospace', fontSize: '0.9rem' }}
+      >
+        Let's build something impressive?
+      </Typography>
+    </Box>
+  );
+}
+export default function MessageList({ messages, isProcessing, approveTools, onUndoLastTurn }: MessageListProps) {
+  const scrollContainerRef = useRef<HTMLDivElement>(null);
+  const stickToBottom = useRef(true);
+  const scrollToBottom = useCallback(() => {
+    const el = scrollContainerRef.current;
+    if (el) el.scrollTop = el.scrollHeight;
+  }, []);
+  useEffect(() => {
+    const el = scrollContainerRef.current;
+    if (!el) return;
+    const onScroll = () => {
+      const distFromBottom = el.scrollHeight - el.scrollTop - el.clientHeight;
+      stickToBottom.current = distFromBottom < 80;
+    };
+    el.addEventListener('scroll', onScroll, { passive: true });
+    return () => el.removeEventListener('scroll', onScroll);
+  }, []);
+  useEffect(() => {
+    if (stickToBottom.current) scrollToBottom();
+  }, [messages, isProcessing, scrollToBottom]);
   useEffect(() => {
+    const el = scrollContainerRef.current;
+    if (!el) return;
+    const observer = new MutationObserver(() => {
+      if (stickToBottom.current) el.scrollTop = el.scrollHeight;
+    });
+    observer.observe(el, { childList: true, subtree: true, characterData: true });
+    return () => observer.disconnect();
+  }, []);
+  const lastUserMsgId = useMemo(() => {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      if (messages[i].role === 'user') return messages[i].id;
+    }
+    return null;
+  }, [messages]);
+  // The last assistant message is "streaming" when we're processing
+  const lastAssistantId = useMemo(() => {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      if (messages[i].role === 'assistant') return messages[i].id;
+    }
+    return null;
+  }, [messages]);
   return (
     <Box
+      ref={scrollContainerRef}
       sx={{
         flex: 1,
         overflow: 'auto',
+        px: { xs: 0.5, sm: 1, md: 2 },
+        py: { xs: 2, md: 3 },
         display: 'flex',
         flexDirection: 'column',
       }}
     >
+      <Stack
+        spacing={3}
+        sx={{
+          maxWidth: 880,
+          mx: 'auto',
+          width: '100%',
+          flex: messages.length === 0 && !isProcessing ? 1 : undefined,
+        }}
+      >
         {messages.length === 0 && !isProcessing ? (
+          <WelcomeGreeting />
         ) : (
+          messages.map((msg) => (
+            <MessageBubble
+              key={msg.id}
+              message={msg}
+              isLastTurn={msg.id === lastUserMsgId}
+              onUndoTurn={onUndoLastTurn}
+              isProcessing={isProcessing}
+              isStreaming={isProcessing && msg.id === lastAssistantId}
+              approveTools={approveTools}
+            />
           ))
         )}
+        <ActivityStatusBar />
+        <div />
+      </Stack>
     </Box>
   );
+}

frontend/src/components/Chat/ThinkingIndicator.tsx ADDED Viewed

	@@ -0,0 +1,48 @@

+import { Box, Typography } from '@mui/material';
+/** Pulsing dots shown while the agent is processing. */
+export default function ThinkingIndicator() {
+  return (
+    <Box sx={{ pt: 0.75 }}>
+      <Typography
+        variant="caption"
+        sx={{
+          fontWeight: 700,
+          fontSize: '0.72rem',
+          color: 'var(--muted-text)',
+          textTransform: 'uppercase',
+          letterSpacing: '0.04em',
+          display: 'flex',
+          alignItems: 'center',
+          gap: 0.75,
+        }}
+      >
+        Thinking
+        <Box
+          component="span"
+          sx={{
+            display: 'inline-flex',
+            gap: '3px',
+            '& span': {
+              width: 4,
+              height: 4,
+              borderRadius: '50%',
+              bgcolor: 'primary.main',
+              animation: 'dotPulse 1.4s ease-in-out infinite',
+            },
+            '& span:nth-of-type(2)': { animationDelay: '0.2s' },
+            '& span:nth-of-type(3)': { animationDelay: '0.4s' },
+            '@keyframes dotPulse': {
+              '0%, 80%, 100%': { opacity: 0.25, transform: 'scale(0.8)' },
+              '40%': { opacity: 1, transform: 'scale(1)' },
+            },
+          }}
+        >
+          <span />
+          <span />
+          <span />
+        </Box>
+      </Typography>
+    </Box>
+  );
+}

frontend/src/components/Chat/ToolCallGroup.tsx ADDED Viewed

	@@ -0,0 +1,655 @@

+import { useCallback, useMemo, useRef, useState } from 'react';
+import { Box, Stack, Typography, Chip, Button, TextField, IconButton, Link, CircularProgress } from '@mui/material';
+import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline';
+import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline';
+import OpenInNewIcon from '@mui/icons-material/OpenInNew';
+import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
+import LaunchIcon from '@mui/icons-material/Launch';
+import SendIcon from '@mui/icons-material/Send';
+import BlockIcon from '@mui/icons-material/Block';
+import { useAgentStore } from '@/store/agentStore';
+import { useLayoutStore } from '@/store/layoutStore';
+import { logger } from '@/utils/logger';
+import type { UIMessage } from 'ai';
+// ---------------------------------------------------------------------------
+// Type helpers — extract the dynamic-tool part type from UIMessage
+// ---------------------------------------------------------------------------
+type DynamicToolPart = Extract<UIMessage['parts'][number], { type: 'dynamic-tool' }>;
+type ToolPartState = DynamicToolPart['state'];
+interface ToolCallGroupProps {
+  tools: DynamicToolPart[];
+  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => Promise<boolean>;
+}
+// ---------------------------------------------------------------------------
+// Visual helpers
+// ---------------------------------------------------------------------------
+function StatusIcon({ state }: { state: ToolPartState }) {
+  switch (state) {
+    case 'approval-requested':
+      return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;
+    case 'output-available':
+      return <CheckCircleOutlineIcon sx={{ fontSize: 16, color: 'success.main' }} />;
+    case 'output-error':
+      return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
+    case 'output-denied':
+      return <BlockIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;
+    case 'input-streaming':
+    case 'input-available':
+    default:
+      return <CircularProgress size={14} thickness={5} sx={{ color: 'var(--accent-yellow)' }} />;
+  }
+}
+function statusLabel(state: ToolPartState): string | null {
+  switch (state) {
+    case 'approval-requested': return 'awaiting approval';
+    case 'input-streaming':
+    case 'input-available': return 'running';
+    case 'output-denied': return 'denied';
+    case 'output-error': return 'error';
+    default: return null;
+  }
+}
+function statusColor(state: ToolPartState): string {
+  switch (state) {
+    case 'approval-requested': return 'var(--accent-yellow)';
+    case 'output-available': return 'var(--accent-green)';
+    case 'output-error': return 'var(--accent-red)';
+    case 'output-denied': return 'var(--muted-text)';
+    default: return 'var(--accent-yellow)';
+  }
+}
+// ---------------------------------------------------------------------------
+// Inline approval UI (per-tool)
+// ---------------------------------------------------------------------------
+function InlineApproval({
+  toolCallId,
+  toolName,
+  input,
+  scriptLabel,
+  onResolve,
+}: {
+  toolCallId: string;
+  toolName: string;
+  input: unknown;
+  scriptLabel: string;
+  onResolve: (toolCallId: string, approved: boolean, feedback?: string) => void;
+}) {
+  const [feedback, setFeedback] = useState('');
+  const args = input as Record<string, unknown> | undefined;
+  const { setPanel, getEditedScript } = useAgentStore();
+  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
+  const hasEditedScript = !!getEditedScript(toolCallId);
+  const handleScriptClick = useCallback(() => {
+    if (toolName === 'hf_jobs' && args?.script) {
+      const scriptContent = getEditedScript(toolCallId) || String(args.script);
+      setPanel(
+        { title: scriptLabel, script: { content: scriptContent, language: 'python' }, parameters: { tool_call_id: toolCallId } },
+        'script',
+        true,
+      );
+      setRightPanelOpen(true);
+      setLeftSidebarOpen(false);
+    }
+  }, [toolCallId, toolName, args, scriptLabel, setPanel, getEditedScript, setRightPanelOpen, setLeftSidebarOpen]);
+  return (
+    <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
+      {toolName === 'hf_jobs' && args && (
+        <Box sx={{ mb: 1.5 }}>
+          <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
+            Execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{scriptLabel.replace('Script', 'Job')}</Box> on{' '}
+            <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
+              {String(args.hardware_flavor || 'default')}
+            </Box>
+            {!!args.timeout && (
+              <> with timeout <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
+                {String(args.timeout)}
+              </Box></>
+            )}
+          </Typography>
+          {typeof args.script === 'string' && args.script && (
+            <Box
+              onClick={handleScriptClick}
+              sx={{
+                mt: 0.5,
+                p: 1.5,
+                bgcolor: 'var(--code-panel-bg)',
+                border: '1px solid var(--tool-border)',
+                borderRadius: '8px',
+                cursor: 'pointer',
+                transition: 'border-color 0.15s ease',
+                '&:hover': { borderColor: 'var(--accent-yellow)' },
+              }}
+            >
+              <Box
+                component="pre"
+                sx={{
+                  m: 0,
+                  fontFamily: '"JetBrains Mono", ui-monospace, SFMono-Regular, monospace',
+                  fontSize: '0.7rem',
+                  lineHeight: 1.5,
+                  color: 'var(--text)',
+                  overflow: 'hidden',
+                  display: '-webkit-box',
+                  WebkitLineClamp: 3,
+                  WebkitBoxOrient: 'vertical',
+                  whiteSpace: 'pre-wrap',
+                  wordBreak: 'break-all',
+                }}
+              >
+                {String(args.script).trim()}
+              </Box>
+              <Typography
+                variant="caption"
+                sx={{
+                  display: 'flex',
+                  alignItems: 'center',
+                  gap: 0.5,
+                  mt: 1,
+                  fontSize: '0.65rem',
+                  color: 'var(--muted-text)',
+                  '&:hover': { color: 'var(--accent-yellow)' },
+                }}
+              >
+                Click to view & edit
+              </Typography>
+            </Box>
+          )}
+        </Box>
+      )}
+      <Box sx={{ display: 'flex', gap: 1, mb: 1 }}>
+        <TextField
+          fullWidth
+          size="small"
+          placeholder="Feedback (optional)"
+          value={feedback}
+          onChange={(e) => setFeedback(e.target.value)}
+          variant="outlined"
+          sx={{
+            '& .MuiOutlinedInput-root': {
+              bgcolor: 'var(--hover-bg)',
+              fontFamily: 'inherit',
+              fontSize: '0.8rem',
+              '& fieldset': { borderColor: 'var(--tool-border)' },
+              '&:hover fieldset': { borderColor: 'var(--border-hover)' },
+              '&.Mui-focused fieldset': { borderColor: 'var(--accent-yellow)' },
+            },
+            '& .MuiOutlinedInput-input': {
+              color: 'var(--text)',
+              '&::placeholder': { color: 'var(--muted-text)', opacity: 0.7 },
+            },
+          }}
+        />
+        <IconButton
+          onClick={() => onResolve(toolCallId, false, feedback || 'Rejected by user')}
+          disabled={!feedback}
+          size="small"
+          sx={{
+            color: 'var(--accent-red)',
+            border: '1px solid var(--tool-border)',
+            borderRadius: '6px',
+            '&:hover': { bgcolor: 'rgba(224,90,79,0.1)', borderColor: 'var(--accent-red)' },
+            '&.Mui-disabled': { color: 'var(--muted-text)', opacity: 0.3 },
+          }}
+        >
+          <SendIcon sx={{ fontSize: 14 }} />
+        </IconButton>
+      </Box>
+      <Box sx={{ display: 'flex', gap: 1 }}>
+        <Button
+          size="small"
+          onClick={() => onResolve(toolCallId, false, feedback || 'Rejected by user')}
+          sx={{
+            flex: 1,
+            textTransform: 'none',
+            border: '1px solid rgba(255,255,255,0.05)',
+            color: 'var(--accent-red)',
+            fontSize: '0.75rem',
+            py: 0.75,
+            borderRadius: '8px',
+            '&:hover': { bgcolor: 'rgba(224,90,79,0.05)', borderColor: 'var(--accent-red)' },
+          }}
+        >
+          Reject
+        </Button>
+        <Button
+          size="small"
+          onClick={() => onResolve(toolCallId, true)}
+          sx={{
+            flex: 1,
+            textTransform: 'none',
+            border: hasEditedScript ? '1px solid var(--accent-green)' : '1px solid rgba(255,255,255,0.05)',
+            color: 'var(--accent-green)',
+            fontSize: '0.75rem',
+            py: 0.75,
+            borderRadius: '8px',
+            bgcolor: hasEditedScript ? 'rgba(47,204,113,0.08)' : 'transparent',
+            '&:hover': { bgcolor: 'rgba(47,204,113,0.05)', borderColor: 'var(--accent-green)' },
+          }}
+        >
+          {hasEditedScript ? 'Approve (edited)' : 'Approve'}
+        </Button>
+      </Box>
+    </Box>
+  );
+}
+// ---------------------------------------------------------------------------
+// Main component
+// ---------------------------------------------------------------------------
+export default function ToolCallGroup({ tools, approveTools }: ToolCallGroupProps) {
+  const { setPanel, lockPanel, getJobUrl, getEditedScript } = useAgentStore();
+  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
+  // ── Batch approval state ─────────────────��────────────────────────
+  const pendingTools = useMemo(
+    () => tools.filter(t => t.state === 'approval-requested'),
+    [tools],
+  );
+  const [decisions, setDecisions] = useState<Record<string, { approved: boolean; feedback?: string }>>({});
+  const [isSubmitting, setIsSubmitting] = useState(false);
+  const submittingRef = useRef(false);
+  const { scriptLabelMap, toolDisplayMap } = useMemo(() => {
+    const hfJobs = tools.filter(t => t.toolName === 'hf_jobs' && (t.input as Record<string, unknown>)?.script);
+    const scriptMap: Record<string, string> = {};
+    const displayMap: Record<string, string> = {};
+    for (let i = 0; i < hfJobs.length; i++) {
+      const id = hfJobs[i].toolCallId;
+      if (hfJobs.length > 1) {
+        scriptMap[id] = `Script ${i + 1}`;
+        displayMap[id] = `hf_jobs #${i + 1}`;
+      } else {
+        scriptMap[id] = 'Script';
+        displayMap[id] = 'hf_jobs';
+      }
+    }
+    return { scriptLabelMap: scriptMap, toolDisplayMap: displayMap };
+  }, [tools]);
+  // ── Send all decisions as a single batch ──────────────────────────
+  const sendBatch = useCallback(
+    async (batch: Record<string, { approved: boolean; feedback?: string }>) => {
+      if (submittingRef.current) return;
+      submittingRef.current = true;
+      setIsSubmitting(true);
+      const approvals = Object.entries(batch).map(([toolCallId, d]) => {
+        const editedScript = d.approved ? (getEditedScript(toolCallId) ?? null) : null;
+        if (editedScript) {
+          logger.log(`Sending edited script for ${toolCallId} (${editedScript.length} chars)`);
+        }
+        return {
+          tool_call_id: toolCallId,
+          approved: d.approved,
+          feedback: d.approved ? null : (d.feedback || 'Rejected by user'),
+          edited_script: editedScript,
+        };
+      });
+      const ok = await approveTools(approvals);
+      if (ok) {
+        lockPanel();
+      } else {
+        logger.error('Batch approval failed');
+        submittingRef.current = false;
+        setIsSubmitting(false);
+      }
+    },
+    [approveTools, lockPanel, getEditedScript],
+  );
+  const handleApproveAll = useCallback(() => {
+    const batch: Record<string, { approved: boolean }> = {};
+    for (const t of pendingTools) batch[t.toolCallId] = { approved: true };
+    sendBatch(batch);
+  }, [pendingTools, sendBatch]);
+  const handleRejectAll = useCallback(() => {
+    const batch: Record<string, { approved: boolean }> = {};
+    for (const t of pendingTools) batch[t.toolCallId] = { approved: false };
+    sendBatch(batch);
+  }, [pendingTools, sendBatch]);
+  const handleIndividualDecision = useCallback(
+    (toolCallId: string, approved: boolean, feedback?: string) => {
+      setDecisions(prev => {
+        const next = { ...prev, [toolCallId]: { approved, feedback } };
+        if (pendingTools.every(t => next[t.toolCallId])) {
+          queueMicrotask(() => sendBatch(next));
+        }
+        return next;
+      });
+    },
+    [pendingTools, sendBatch],
+  );
+  const undoDecision = useCallback((toolCallId: string) => {
+    setDecisions(prev => {
+      const next = { ...prev };
+      delete next[toolCallId];
+      return next;
+    });
+  }, []);
+  // ── Panel click handler ───────────────────────────────────────────
+  const handleClick = useCallback(
+    (tool: DynamicToolPart) => {
+      const args = tool.input as Record<string, unknown> | undefined;
+      const displayName = toolDisplayMap[tool.toolCallId] || tool.toolName;
+      if (tool.toolName === 'hf_jobs' && args?.script) {
+        const hasOutput = (tool.state === 'output-available' || tool.state === 'output-error') && tool.output;
+        const scriptContent = getEditedScript(tool.toolCallId) || String(args.script);
+        setPanel(
+          {
+            title: displayName,
+            script: { content: scriptContent, language: 'python' },
+            ...(hasOutput ? { output: { content: String(tool.output), language: 'markdown' } } : {}),
+            parameters: { tool_call_id: tool.toolCallId },
+          },
+          hasOutput ? 'output' : 'script',
+        );
+        setRightPanelOpen(true);
+        setLeftSidebarOpen(false);
+        return;
+      }
+      if ((tool.state === 'output-available' || tool.state === 'output-error') && tool.output) {
+        let language = 'text';
+        const content = String(tool.output);
+        if (content.trim().startsWith('{') || content.trim().startsWith('[')) language = 'json';
+        else if (content.includes('```')) language = 'markdown';
+        setPanel({ title: displayName, output: { content, language } }, 'output');
+        setRightPanelOpen(true);
+      } else if (args) {
+        const content = JSON.stringify(args, null, 2);
+        setPanel({ title: displayName, output: { content, language: 'json' } }, 'output');
+        setRightPanelOpen(true);
+      }
+    },
+    [toolDisplayMap, setPanel, getEditedScript, setRightPanelOpen, setLeftSidebarOpen],
+  );
+  // ── Parse hf_jobs metadata from output ────────────────────────────
+  function parseJobMeta(output: unknown): { jobUrl?: string; jobStatus?: string } {
+    if (typeof output !== 'string') return {};
+    const urlMatch = output.match(/\*\*View at:\*\*\s*(https:\/\/[^\s\n]+)/);
+    const statusMatch = output.match(/\*\*Final Status:\*\*\s*([^\n]+)/);
+    return {
+      jobUrl: urlMatch?.[1],
+      jobStatus: statusMatch?.[1]?.trim(),
+    };
+  }
+  // ── Render ────────────────────────────────────────────────────────
+  const decidedCount = pendingTools.filter(t => decisions[t.toolCallId]).length;
+  return (
+    <Box
+      sx={{
+        borderRadius: 2,
+        border: '1px solid var(--tool-border)',
+        bgcolor: 'var(--tool-bg)',
+        overflow: 'hidden',
+        my: 1,
+      }}
+    >
+      {/* Batch approval header — hidden once user starts deciding individually */}
+      {pendingTools.length > 1 && !isSubmitting && decidedCount === 0 && (
+        <Box
+          sx={{
+            display: 'flex',
+            alignItems: 'center',
+            gap: 1,
+            px: 1.5,
+            py: 1,
+            borderBottom: '1px solid var(--tool-border)',
+          }}
+        >
+          <Typography
+            variant="body2"
+            sx={{ fontSize: '0.72rem', color: 'var(--muted-text)', mr: 'auto', whiteSpace: 'nowrap' }}
+          >
+            {`${pendingTools.length} tool${pendingTools.length > 1 ? 's' : ''} pending`}
+          </Typography>
+          <Button
+            size="small"
+            onClick={handleRejectAll}
+            sx={{
+              textTransform: 'none',
+              color: 'var(--accent-red)',
+              border: '1px solid rgba(255,255,255,0.05)',
+              fontSize: '0.72rem',
+              py: 0.5,
+              px: 1.5,
+              borderRadius: '8px',
+              '&:hover': { bgcolor: 'rgba(224,90,79,0.05)', borderColor: 'var(--accent-red)' },
+            }}
+          >
+            Reject all
+          </Button>
+          <Button
+            size="small"
+            onClick={handleApproveAll}
+            sx={{
+              textTransform: 'none',
+              color: 'var(--accent-green)',
+              border: '1px solid var(--accent-green)',
+              fontSize: '0.72rem',
+              fontWeight: 600,
+              py: 0.5,
+              px: 1.5,
+              borderRadius: '8px',
+              '&:hover': { bgcolor: 'rgba(47,204,113,0.1)' },
+            }}
+          >
+            Approve all{pendingTools.length > 1 ? ` (${pendingTools.length})` : ''}
+          </Button>
+        </Box>
+      )}
+      {/* Tool list */}
+      <Stack divider={<Box sx={{ borderBottom: '1px solid var(--tool-border)' }} />}>
+        {tools.map((tool) => {
+          const state = tool.state;
+          const isPending = state === 'approval-requested';
+          const clickable =
+            state === 'output-available' ||
+            state === 'output-error' ||
+            !!tool.input;
+          const localDecision = decisions[tool.toolCallId];
+          const displayState = isPending && localDecision
+            ? (localDecision.approved ? 'input-available' : 'output-denied')
+            : state;
+          const label = statusLabel(displayState as ToolPartState);
+          // Parse job metadata from hf_jobs output and store
+          const jobUrlFromStore = tool.toolName === 'hf_jobs' ? getJobUrl(tool.toolCallId) : undefined;
+          const jobMetaFromOutput = tool.toolName === 'hf_jobs' && tool.state === 'output-available'
+            ? parseJobMeta(tool.output)
+            : {};
+          // Combine job URL from store (available immediately) with output metadata (available at completion)
+          const jobMeta = {
+            jobUrl: jobUrlFromStore || jobMetaFromOutput.jobUrl,
+            jobStatus: jobMetaFromOutput.jobStatus,
+          };
+          return (
+            <Box key={tool.toolCallId}>
+              {/* Main tool row */}
+              <Stack
+                direction="row"
+                alignItems="center"
+                spacing={1}
+                onClick={() => !isPending && handleClick(tool)}
+                sx={{
+                  px: 1.5,
+                  py: 1,
+                  cursor: isPending ? 'default' : clickable ? 'pointer' : 'default',
+                  transition: 'background-color 0.15s',
+                  '&:hover': clickable && !isPending ? { bgcolor: 'var(--hover-bg)' } : {},
+                }}
+              >
+                <StatusIcon state={
+                  (tool.toolName === 'hf_jobs' && jobMeta.jobStatus && ['ERROR', 'FAILED', 'CANCELLED'].includes(jobMeta.jobStatus) && displayState === 'output-available')
+                    ? 'output-error'
+                    : displayState as ToolPartState
+                } />
+                <Typography
+                  variant="body2"
+                  sx={{
+                    fontFamily: '"JetBrains Mono", ui-monospace, SFMono-Regular, monospace',
+                    fontWeight: 600,
+                    fontSize: '0.78rem',
+                    color: 'var(--text)',
+                    flex: 1,
+                    minWidth: 0,
+                    overflow: 'hidden',
+                    textOverflow: 'ellipsis',
+                    whiteSpace: 'nowrap',
+                  }}
+                >
+                  {toolDisplayMap[tool.toolCallId] || tool.toolName}
+                </Typography>
+                {/* Status chip (non hf_jobs, or hf_jobs without final status) */}
+                {label && !(tool.toolName === 'hf_jobs' && jobMeta.jobStatus) && (
+                  <Chip
+                    label={label}
+                    size="small"
+                    sx={{
+                      height: 20,
+                      fontSize: '0.65rem',
+                      fontWeight: 600,
+                      bgcolor: displayState === 'output-error' ? 'rgba(224,90,79,0.12)'
+                        : displayState === 'output-denied' ? 'rgba(255,255,255,0.05)'
+                        : 'var(--accent-yellow-weak)',
+                      color: statusColor(displayState as ToolPartState),
+                      letterSpacing: '0.03em',
+                    }}
+                  />
+                )}
+                {/* HF Jobs: final status chip from job metadata */}
+                {tool.toolName === 'hf_jobs' && jobMeta.jobStatus && (
+                  <Chip
+                    label={jobMeta.jobStatus}
+                    size="small"
+                    sx={{
+                      height: 20,
+                      fontSize: '0.65rem',
+                      fontWeight: 600,
+                      bgcolor: jobMeta.jobStatus === 'COMPLETED'
+                        ? 'rgba(47,204,113,0.12)'
+                        : ['ERROR', 'FAILED', 'CANCELLED'].includes(jobMeta.jobStatus!)
+                          ? 'rgba(224,90,79,0.12)'
+                          : 'rgba(255,193,59,0.12)',
+                      color: jobMeta.jobStatus === 'COMPLETED'
+                        ? 'var(--accent-green)'
+                        : ['ERROR', 'FAILED', 'CANCELLED'].includes(jobMeta.jobStatus!)
+                          ? 'var(--accent-red)'
+                          : 'var(--accent-yellow)',
+                      letterSpacing: '0.03em',
+                    }}
+                  />
+                )}
+                {/* View on HF link — single place, shown whenever URL is available */}
+                {tool.toolName === 'hf_jobs' && jobMeta.jobUrl && (
+                  <Link
+                    href={jobMeta.jobUrl}
+                    target="_blank"
+                    rel="noopener noreferrer"
+                    onClick={(e) => e.stopPropagation()}
+                    sx={{
+                      display: 'inline-flex',
+                      alignItems: 'center',
+                      gap: 0.5,
+                      color: 'var(--accent-yellow)',
+                      fontSize: '0.68rem',
+                      textDecoration: 'none',
+                      ml: 0.5,
+                      '&:hover': { textDecoration: 'underline' },
+                    }}
+                  >
+                    <LaunchIcon sx={{ fontSize: 12 }} />
+                    View on HF
+                  </Link>
+                )}
+                {clickable && !isPending && (
+                  <OpenInNewIcon sx={{ fontSize: 14, color: 'var(--muted-text)', opacity: 0.6 }} />
+                )}
+              </Stack>
+              {/* Per-tool approval: undecided */}
+              {isPending && !localDecision && !isSubmitting && (
+                <InlineApproval
+                  toolCallId={tool.toolCallId}
+                  toolName={tool.toolName}
+                  input={tool.input}
+                  scriptLabel={scriptLabelMap[tool.toolCallId] || 'Script'}
+                  onResolve={handleIndividualDecision}
+                />
+              )}
+              {/* Per-tool approval: locally decided (undo available) */}
+              {isPending && localDecision && !isSubmitting && (
+                <Box
+                  sx={{
+                    display: 'flex',
+                    alignItems: 'center',
+                    justifyContent: 'space-between',
+                    px: 1.5,
+                    py: 0.75,
+                    borderTop: '1px solid var(--tool-border)',
+                  }}
+                >
+                  <Typography variant="body2" sx={{ fontSize: '0.72rem', color: 'var(--muted-text)' }}>
+                    {localDecision.approved
+                      ? 'Marked for approval'
+                      : `Marked for rejection${localDecision.feedback ? `: ${localDecision.feedback}` : ''}`}
+                  </Typography>
+                  <Button
+                    size="small"
+                    onClick={() => undoDecision(tool.toolCallId)}
+                    sx={{
+                      textTransform: 'none',
+                      fontSize: '0.7rem',
+                      color: 'var(--muted-text)',
+                      minWidth: 'auto',
+                      px: 1,
+                      '&:hover': { color: 'var(--text)' },
+                    }}
+                  >
+                    Undo
+                  </Button>
+                </Box>
+              )}
+            </Box>
+          );
+        })}
+      </Stack>
+    </Box>
+  );
+}

frontend/src/components/Chat/UserMessage.tsx ADDED Viewed

	@@ -0,0 +1,105 @@

+import { Box, Stack, Typography, IconButton, Tooltip } from '@mui/material';
+import CloseIcon from '@mui/icons-material/Close';
+import type { UIMessage } from 'ai';
+import type { MessageMeta } from '@/types/agent';
+interface UserMessageProps {
+  message: UIMessage;
+  isLastTurn?: boolean;
+  onUndoTurn?: () => void;
+  isProcessing?: boolean;
+}
+function extractText(message: UIMessage): string {
+  return message.parts
+    .filter((p): p is Extract<typeof p, { type: 'text' }> => p.type === 'text')
+    .map(p => p.text)
+    .join('');
+}
+export default function UserMessage({
+  message,
+  isLastTurn = false,
+  onUndoTurn,
+  isProcessing = false,
+}: UserMessageProps) {
+  const showUndo = isLastTurn && !isProcessing && !!onUndoTurn;
+  const text = extractText(message);
+  const meta = message.metadata as MessageMeta | undefined;
+  const timeStr = meta?.createdAt
+    ? new Date(meta.createdAt).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })
+    : null;
+  return (
+    <Stack
+      direction="row"
+      spacing={1.5}
+      justifyContent="flex-end"
+      alignItems="flex-start"
+      sx={{
+        '& .undo-btn': {
+          opacity: 0,
+          transition: 'opacity 0.15s ease',
+        },
+        '&:hover .undo-btn': {
+          opacity: 1,
+        },
+      }}
+    >
+      {showUndo && (
+        <Box className="undo-btn" sx={{ display: 'flex', alignItems: 'center', mt: 0.75 }}>
+          <Tooltip title="Remove this turn" placement="left">
+            <IconButton
+              onClick={onUndoTurn}
+              size="small"
+              sx={{
+                width: 24,
+                height: 24,
+                color: 'var(--muted-text)',
+                '&:hover': {
+                  color: 'var(--accent-red)',
+                  bgcolor: 'rgba(244,67,54,0.08)',
+                },
+              }}
+            >
+              <CloseIcon sx={{ fontSize: 14 }} />
+            </IconButton>
+          </Tooltip>
+        </Box>
+      )}
+      <Box
+        sx={{
+          maxWidth: { xs: '88%', md: '72%' },
+          bgcolor: 'var(--surface)',
+          borderRadius: 1.5,
+          borderTopRightRadius: 4,
+          px: { xs: 1.5, md: 2.5 },
+          py: 1.5,
+          border: '1px solid var(--border)',
+        }}
+      >
+        <Typography
+          variant="body1"
+          sx={{
+            fontSize: '0.925rem',
+            lineHeight: 1.65,
+            color: 'var(--text)',
+            whiteSpace: 'pre-wrap',
+            wordBreak: 'break-word',
+          }}
+        >
+          {text}
+        </Typography>
+        {timeStr && (
+          <Typography
+            variant="caption"
+            sx={{ color: 'var(--muted-text)', mt: 0.5, display: 'block', textAlign: 'right', fontSize: '0.7rem' }}
+          >
+            {timeStr}
+          </Typography>
+        )}
+      </Box>
+    </Stack>
+  );
+}

frontend/src/components/CodePanel/CodePanel.tsx CHANGED Viewed

@@ -1,138 +1,463 @@
-import { useRef, useEffect, useMemo } from 'react';
-import { Box, Typography, IconButton } from '@mui/material';
 import CloseIcon from '@mui/icons-material/Close';
 import RadioButtonUncheckedIcon from '@mui/icons-material/RadioButtonUnchecked';
 import CheckCircleIcon from '@mui/icons-material/CheckCircle';
 import PlayCircleOutlineIcon from '@mui/icons-material/PlayCircleOutline';
 import CodeIcon from '@mui/icons-material/Code';
-import TerminalIcon from '@mui/icons-material/Terminal';
 import ArticleIcon from '@mui/icons-material/Article';
 import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
-import { vscDarkPlus } from 'react-syntax-highlighter/dist/esm/styles/prism';
 import ReactMarkdown from 'react-markdown';
 import remarkGfm from 'remark-gfm';
 import { useAgentStore } from '@/store/agentStore';
 import { useLayoutStore } from '@/store/layoutStore';
 import { processLogs } from '@/utils/logProcessor';
 export default function CodePanel() {
-  const { panelContent, panelTabs, activePanelTab, setActivePanelTab, removePanelTab, plan } = useAgentStore();
-  const { setRightPanelOpen } = useLayoutStore();
   const scrollRef = useRef<HTMLDivElement>(null);
-  // Get the active tab content, or fall back to panelContent for backwards compatibility
-  const activeTab = panelTabs.find(t => t.id === activePanelTab);
-  const currentContent = activeTab || panelContent;
   const displayContent = useMemo(() => {
-    if (!currentContent?.content) return '';
-    // Apply log processing only for text/logs, not for code/json
-    if (!currentContent.language || currentContent.language === 'text') {
-      return processLogs(currentContent.content);
     }
-    return currentContent.content;
-  }, [currentContent?.content, currentContent?.language]);
   useEffect(() => {
-    // Auto-scroll only for logs tab
-    if (scrollRef.current && activePanelTab === 'logs') {
       scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
     }
-  }, [displayContent, activePanelTab]);
-  const hasTabs = panelTabs.length > 0;
   return (
     <Box sx={{ height: '100%', display: 'flex', flexDirection: 'column', bgcolor: 'var(--panel)' }}>
-      {/* Header - Fixed 60px to align */}
-      <Box sx={{
-        height: '60px',
-        display: 'flex',
-        alignItems: 'center',
-        justifyContent: 'space-between',
-        px: 2,
-        borderBottom: '1px solid rgba(255,255,255,0.03)'
-      }}>
-        {hasTabs ? (
-          <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5, flexWrap: 'wrap' }}>
-            {panelTabs.map((tab) => {
-              const isActive = activePanelTab === tab.id;
-              // Choose icon based on tab type
-              let icon = <TerminalIcon sx={{ fontSize: 14 }} />;
-              if (tab.id === 'script' || tab.language === 'python') {
-                icon = <CodeIcon sx={{ fontSize: 14 }} />;
-              } else if (tab.id === 'tool_output' || tab.language === 'markdown' || tab.language === 'json') {
-                icon = <ArticleIcon sx={{ fontSize: 14 }} />;
-              }
-              return (
-                <Box
-                  key={tab.id}
-                  onClick={() => setActivePanelTab(tab.id)}
-                  sx={{
-                    display: 'flex',
-                    alignItems: 'center',
-                    gap: 0.5,
-                    px: 1.5,
-                    py: 0.75,
-                    borderRadius: 1,
-                    cursor: 'pointer',
-                    fontSize: '0.7rem',
-                    fontWeight: 600,
-                    textTransform: 'uppercase',
-                    letterSpacing: '0.05em',
-                    color: isActive ? 'var(--text)' : 'var(--muted-text)',
-                    bgcolor: isActive ? 'rgba(255,255,255,0.08)' : 'transparent',
-                    border: '1px solid',
-                    borderColor: isActive ? 'rgba(255,255,255,0.1)' : 'transparent',
-                    transition: 'all 0.15s ease',
-                    '&:hover': {
-                      bgcolor: 'rgba(255,255,255,0.05)',
-                    },
-                  }}
-                >
-                  {icon}
-                  <span>{tab.title}</span>
-                  <Box
-                    component="span"
-                    onClick={(e) => {
-                      e.stopPropagation();
-                      removePanelTab(tab.id);
-                    }}
-                    sx={{
-                      display: 'flex',
-                      alignItems: 'center',
-                      justifyContent: 'center',
-                      ml: 0.5,
-                      width: 16,
-                      height: 16,
-                      borderRadius: '50%',
-                      fontSize: '0.65rem',
-                      opacity: 0.5,
-                      '&:hover': {
-                        opacity: 1,
-                        bgcolor: 'rgba(255,255,255,0.1)',
-                      },
-                    }}
-                  >
-                    ✕
-                  </Box>
                 </Box>
-              );
-            })}
-          </Box>
-        ) : (
-          <Typography variant="caption" sx={{ fontWeight: 600, color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em' }}>
-            {currentContent?.title || 'Code Panel'}
-          </Typography>
-        )}
-        <IconButton size="small" onClick={() => setRightPanelOpen(false)} sx={{ color: 'var(--muted-text)' }}>
-          <CloseIcon fontSize="small" />
-        </IconButton>
       </Box>
-      {/* Main Content Area */}
       <Box sx={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}>
-        {!currentContent ? (
           <Box sx={{ flex: 1, display: 'flex', alignItems: 'center', justifyContent: 'center', p: 4 }}>
             <Typography variant="body2" color="text.secondary" sx={{ opacity: 0.5 }}>
               NO DATA LOADED
@@ -144,174 +469,72 @@ export default function CodePanel() {
               ref={scrollRef}
               className="code-panel"
               sx={{
-                background: '#0A0B0C',
                 borderRadius: 'var(--radius-md)',
-                padding: '18px',
-                border: '1px solid rgba(255,255,255,0.03)',
-                fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace',
                 fontSize: '13px',
                 lineHeight: 1.55,
                 height: '100%',
                 overflow: 'auto',
               }}
             >
-              {currentContent.content ? (
-                currentContent.language === 'python' ? (
-                  <SyntaxHighlighter
-                    language="python"
-                    style={vscDarkPlus}
-                    customStyle={{
-                      margin: 0,
-                      padding: 0,
-                      background: 'transparent',
-                      fontSize: '13px',
-                      fontFamily: 'inherit',
-                    }}
-                    wrapLines={true}
-                    wrapLongLines={true}
-                  >
-                    {displayContent}
-                  </SyntaxHighlighter>
-                ) : currentContent.language === 'json' ? (
-                  <SyntaxHighlighter
-                    language="json"
-                    style={vscDarkPlus}
-                    customStyle={{
-                      margin: 0,
-                      padding: 0,
-                      background: 'transparent',
-                      fontSize: '13px',
-                      fontFamily: 'inherit',
-                    }}
-                    wrapLines={true}
-                    wrapLongLines={true}
-                  >
-                    {displayContent}
-                  </SyntaxHighlighter>
-                ) : currentContent.language === 'markdown' ? (
-                  <Box sx={{
-                    color: 'var(--text)',
-                    fontSize: '13px',
-                    lineHeight: 1.6,
-                    '& p': { m: 0, mb: 1.5, '&:last-child': { mb: 0 } },
-                    '& pre': {
-                      bgcolor: 'rgba(0,0,0,0.4)',
-                      p: 1.5,
-                      borderRadius: 1,
-                      overflow: 'auto',
-                      fontSize: '12px',
-                      border: '1px solid rgba(255,255,255,0.05)',
-                    },
-                    '& code': {
-                      bgcolor: 'rgba(255,255,255,0.05)',
-                      px: 0.5,
-                      py: 0.25,
-                      borderRadius: 0.5,
-                      fontSize: '12px',
-                      fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
-                    },
-                    '& pre code': { bgcolor: 'transparent', p: 0 },
-                    '& a': {
-                      color: 'var(--accent-yellow)',
-                      textDecoration: 'none',
-                      '&:hover': { textDecoration: 'underline' },
-                    },
-                    '& ul, & ol': { pl: 2.5, my: 1 },
-                    '& li': { mb: 0.5 },
-                    '& table': {
-                      borderCollapse: 'collapse',
-                      width: '100%',
-                      my: 2,
-                      fontSize: '12px',
-                      fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
-                    },
-                    '& th': {
-                      borderBottom: '2px solid rgba(255,255,255,0.15)',
-                      textAlign: 'left',
-                      p: 1,
-                      fontWeight: 600,
-                    },
-                    '& td': {
-                      borderBottom: '1px solid rgba(255,255,255,0.05)',
-                      p: 1,
-                    },
-                    '& h1, & h2, & h3, & h4': {
-                      mt: 2,
-                      mb: 1,
-                      fontWeight: 600,
-                    },
-                    '& h1': { fontSize: '1.25rem' },
-                    '& h2': { fontSize: '1.1rem' },
-                    '& h3': { fontSize: '1rem' },
-                    '& blockquote': {
-                      borderLeft: '3px solid rgba(255,255,255,0.2)',
-                      pl: 2,
-                      ml: 0,
-                      color: 'var(--muted-text)',
-                    },
-                  }}>
-                    <ReactMarkdown remarkPlugins={[remarkGfm]}>{displayContent}</ReactMarkdown>
-                  </Box>
-                ) : (
-                  <Box component="pre" sx={{
-                    m: 0,
-                    fontFamily: 'inherit',
-                    color: 'var(--text)',
-                    whiteSpace: 'pre-wrap',
-                    wordBreak: 'break-all'
-                  }}>
-                    <code>{displayContent}</code>
-                  </Box>
-                )
-              ) : (
-                <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%', opacity: 0.5 }}>
-                  <Typography variant="caption">
-                    NO CONTENT TO DISPLAY
-                  </Typography>
-                </Box>
-              )}
             </Box>
           </Box>
         )}
       </Box>
-      {/* Plan Display at Bottom */}
       {plan && plan.length > 0 && (
-        <Box sx={{
-            borderTop: '1px solid rgba(255,255,255,0.03)',
-            bgcolor: 'rgba(0,0,0,0.2)',
             maxHeight: '30%',
             display: 'flex',
-            flexDirection: 'column'
-        }}>
-            <Box sx={{ p: 1.5, borderBottom: '1px solid rgba(255,255,255,0.03)', display: 'flex', alignItems: 'center', gap: 1 }}>
-                <Typography variant="caption" sx={{ fontWeight: 600, color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em' }}>
-                    CURRENT PLAN
                 </Typography>
-            </Box>
-            <Box sx={{ p: 2, overflow: 'auto', display: 'flex', flexDirection: 'column', gap: 1 }}>
-                {plan.map((item) => (
-                    <Box key={item.id} sx={{ display: 'flex', alignItems: 'flex-start', gap: 1.5 }}>
-                        <Box sx={{ mt: 0.2 }}>
-                            {item.status === 'completed' && <CheckCircleIcon sx={{ fontSize: 16, color: 'var(--accent-green)' }} />}
-                            {item.status === 'in_progress' && <PlayCircleOutlineIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />}
-                            {item.status === 'pending' && <RadioButtonUncheckedIcon sx={{ fontSize: 16, color: 'var(--muted-text)', opacity: 0.5 }} />}
-                        </Box>
-                        <Typography
-                            variant="body2"
-                            sx={{
-                                fontSize: '13px',
-                                fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
-                                color: item.status === 'completed' ? 'var(--muted-text)' : 'var(--text)',
-                                textDecoration: item.status === 'completed' ? 'line-through' : 'none',
-                                opacity: item.status === 'pending' ? 0.7 : 1
-                            }}
-                        >
-                            {item.content}
-                        </Typography>
-                    </Box>
-                ))}
-            </Box>
         </Box>
       )}
     </Box>

+import { useRef, useEffect, useMemo, useState, useCallback } from 'react';
+import { Box, Stack, Typography, IconButton, Button, Tooltip } from '@mui/material';
 import CloseIcon from '@mui/icons-material/Close';
 import RadioButtonUncheckedIcon from '@mui/icons-material/RadioButtonUnchecked';
 import CheckCircleIcon from '@mui/icons-material/CheckCircle';
 import PlayCircleOutlineIcon from '@mui/icons-material/PlayCircleOutline';
 import CodeIcon from '@mui/icons-material/Code';
 import ArticleIcon from '@mui/icons-material/Article';
+import EditIcon from '@mui/icons-material/Edit';
+import UndoIcon from '@mui/icons-material/Undo';
+import ContentCopyIcon from '@mui/icons-material/ContentCopy';
+import CheckIcon from '@mui/icons-material/Check';
 import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
+import { vscDarkPlus, vs } from 'react-syntax-highlighter/dist/esm/styles/prism';
 import ReactMarkdown from 'react-markdown';
 import remarkGfm from 'remark-gfm';
 import { useAgentStore } from '@/store/agentStore';
 import { useLayoutStore } from '@/store/layoutStore';
 import { processLogs } from '@/utils/logProcessor';
+import type { PanelView } from '@/store/agentStore';
+// ── Helpers ──────────────────────────────────────────────────────
+function PlanStatusIcon({ status }: { status: string }) {
+  if (status === 'completed') return <CheckCircleIcon sx={{ fontSize: 16, color: 'var(--accent-green)' }} />;
+  if (status === 'in_progress') return <PlayCircleOutlineIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;
+  return <RadioButtonUncheckedIcon sx={{ fontSize: 16, color: 'var(--muted-text)', opacity: 0.5 }} />;
+}
+// ── Markdown styles (adapts via CSS vars) ────────────────────────
+const markdownSx = {
+  color: 'var(--text)',
+  fontSize: '13px',
+  lineHeight: 1.6,
+  '& p': { m: 0, mb: 1.5, '&:last-child': { mb: 0 } },
+  '& pre': {
+    bgcolor: 'var(--code-bg)',
+    p: 1.5,
+    borderRadius: 1,
+    overflow: 'auto',
+    fontSize: '12px',
+    border: '1px solid var(--tool-border)',
+  },
+  '& code': {
+    bgcolor: 'var(--hover-bg)',
+    px: 0.5,
+    py: 0.25,
+    borderRadius: 0.5,
+    fontSize: '12px',
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
+  },
+  '& pre code': { bgcolor: 'transparent', p: 0 },
+  '& a': {
+    color: 'var(--accent-yellow)',
+    textDecoration: 'none',
+    '&:hover': { textDecoration: 'underline' },
+  },
+  '& ul, & ol': { pl: 2.5, my: 1 },
+  '& li': { mb: 0.5 },
+  '& table': {
+    borderCollapse: 'collapse',
+    width: '100%',
+    my: 2,
+    fontSize: '12px',
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
+  },
+  '& th': {
+    borderBottom: '2px solid var(--border-hover)',
+    textAlign: 'left',
+    p: 1,
+    fontWeight: 600,
+  },
+  '& td': {
+    borderBottom: '1px solid var(--tool-border)',
+    p: 1,
+  },
+  '& h1, & h2, & h3, & h4': { mt: 2, mb: 1, fontWeight: 600 },
+  '& h1': { fontSize: '1.25rem' },
+  '& h2': { fontSize: '1.1rem' },
+  '& h3': { fontSize: '1rem' },
+  '& blockquote': {
+    borderLeft: '3px solid var(--accent-yellow)',
+    pl: 2,
+    ml: 0,
+    color: 'var(--muted-text)',
+  },
+} as const;
+// ── View toggle button ──────────────────────────────────────────
+function ViewToggle({ view, icon, label, isActive, onClick }: {
+  view: PanelView;
+  icon: React.ReactNode;
+  label: string;
+  isActive: boolean;
+  onClick: (v: PanelView) => void;
+}) {
+  return (
+    <Box
+      onClick={() => onClick(view)}
+      sx={{
+        display: 'flex',
+        alignItems: 'center',
+        gap: 0.5,
+        px: 1.5,
+        py: 0.75,
+        borderRadius: 1,
+        cursor: 'pointer',
+        fontSize: '0.7rem',
+        fontWeight: 600,
+        textTransform: 'uppercase',
+        letterSpacing: '0.05em',
+        whiteSpace: 'nowrap',
+        color: isActive ? 'var(--text)' : 'var(--muted-text)',
+        bgcolor: isActive ? 'var(--tab-active-bg)' : 'transparent',
+        border: '1px solid',
+        borderColor: isActive ? 'var(--tab-active-border)' : 'transparent',
+        transition: 'all 0.15s ease',
+        '&:hover': { bgcolor: 'var(--tab-hover-bg)' },
+      }}
+    >
+      {icon}
+      <span>{label}</span>
+    </Box>
+  );
+}
+// ── Component ────────────────────────────────────────────────────
 export default function CodePanel() {
+  const { panelData, panelView, panelEditable, setPanelView, updatePanelScript, setEditedScript, plan } =
+    useAgentStore();
+  const { setRightPanelOpen, themeMode } = useLayoutStore();
   const scrollRef = useRef<HTMLDivElement>(null);
+  const textareaRef = useRef<HTMLTextAreaElement>(null);
+  const [isEditing, setIsEditing] = useState(false);
+  const [editedContent, setEditedContent] = useState('');
+  const [originalContent, setOriginalContent] = useState('');
+  const [copied, setCopied] = useState(false);
+  const isDark = themeMode === 'dark';
+  const syntaxTheme = isDark ? vscDarkPlus : vs;
+  const activeSection = panelView === 'script' ? panelData?.script : panelData?.output;
+  const hasScript = !!panelData?.script;
+  const hasOutput = !!panelData?.output;
+  const hasBothViews = hasScript && hasOutput;
+  const isEditableScript = panelView === 'script' && panelEditable;
+  const hasUnsavedChanges = isEditing && editedContent !== originalContent;
+  // Sync edited content when panel data changes
+  useEffect(() => {
+    if (panelData?.script?.content && panelView === 'script' && panelEditable) {
+      setOriginalContent(panelData.script.content);
+      if (!isEditing) {
+        setEditedContent(panelData.script.content);
+      }
+    }
+  }, [panelData?.script?.content, panelView, panelEditable, isEditing]);
+  // Exit editing when switching away from script view or losing editable
+  useEffect(() => {
+    if (!isEditableScript && isEditing) {
+      setIsEditing(false);
+    }
+  }, [isEditableScript, isEditing]);
+  const handleStartEdit = useCallback(() => {
+    if (panelData?.script?.content) {
+      setEditedContent(panelData.script.content);
+      setOriginalContent(panelData.script.content);
+      setIsEditing(true);
+      setTimeout(() => textareaRef.current?.focus(), 0);
+    }
+  }, [panelData?.script?.content]);
+  const handleCancelEdit = useCallback(() => {
+    setEditedContent(originalContent);
+    setIsEditing(false);
+  }, [originalContent]);
+  const handleSaveEdit = useCallback(() => {
+    if (editedContent !== originalContent) {
+      updatePanelScript(editedContent);
+      const toolCallId = panelData?.parameters?.tool_call_id as string | undefined;
+      if (toolCallId) {
+        setEditedScript(toolCallId, editedContent);
+      }
+      setOriginalContent(editedContent);
+    }
+    setIsEditing(false);
+  }, [panelData?.parameters?.tool_call_id, editedContent, originalContent, updatePanelScript, setEditedScript]);
+  const handleCopy = useCallback(async () => {
+    const contentToCopy = isEditing ? editedContent : (activeSection?.content || '');
+    if (contentToCopy) {
+      try {
+        await navigator.clipboard.writeText(contentToCopy);
+        setCopied(true);
+        setTimeout(() => setCopied(false), 2000);
+      } catch (err) {
+        console.error('Failed to copy:', err);
+      }
+    }
+  }, [isEditing, editedContent, activeSection?.content]);
   const displayContent = useMemo(() => {
+    if (!activeSection?.content) return '';
+    if (!activeSection.language || activeSection.language === 'text') {
+      return processLogs(activeSection.content);
     }
+    return activeSection.content;
+  }, [activeSection?.content, activeSection?.language]);
   useEffect(() => {
+    if (scrollRef.current && panelView === 'output') {
       scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
     }
+  }, [displayContent, panelView]);
+  // ── Syntax-highlighted code block (DRY) ────────────────────────
+  const renderSyntaxBlock = (language: string) => (
+    <SyntaxHighlighter
+      language={language}
+      style={syntaxTheme}
+      customStyle={{
+        margin: 0,
+        padding: 0,
+        background: 'transparent',
+        fontSize: '13px',
+        fontFamily: 'inherit',
+      }}
+      wrapLines
+      wrapLongLines
+    >
+      {displayContent}
+    </SyntaxHighlighter>
+  );
+  // ── Content renderer ───────────────────────────────────────────
+  const renderContent = () => {
+    if (!activeSection?.content) {
+      return (
+        <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%', opacity: 0.5 }}>
+          <Typography variant="caption">NO CONTENT TO DISPLAY</Typography>
+        </Box>
+      );
+    }
+    if (isEditing && isEditableScript) {
+      return (
+        <Box sx={{ position: 'relative', width: '100%', height: '100%' }}>
+          <SyntaxHighlighter
+            language={activeSection?.language === 'python' ? 'python' : activeSection?.language === 'json' ? 'json' : 'text'}
+            style={syntaxTheme}
+            customStyle={{
+              margin: 0,
+              padding: 0,
+              background: 'transparent',
+              fontSize: '13px',
+              fontFamily: '"JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
+              lineHeight: 1.55,
+              pointerEvents: 'none',
+            }}
+            wrapLines
+            wrapLongLines
+          >
+            {editedContent || ' '}
+          </SyntaxHighlighter>
+          <textarea
+            ref={textareaRef}
+            value={editedContent}
+            onChange={(e) => setEditedContent(e.target.value)}
+            spellCheck={false}
+            style={{
+              position: 'absolute',
+              top: 0,
+              left: 0,
+              width: '100%',
+              height: '100%',
+              background: 'transparent',
+              border: 'none',
+              outline: 'none',
+              resize: 'none',
+              color: 'transparent',
+              caretColor: 'var(--text)',
+              fontFamily: '"JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
+              fontSize: '13px',
+              lineHeight: 1.55,
+              overflow: 'hidden',
+            }}
+          />
+        </Box>
+      );
+    }
+    const lang = activeSection.language;
+    if (lang === 'python') return renderSyntaxBlock('python');
+    if (lang === 'json') return renderSyntaxBlock('json');
+    if (lang === 'markdown') {
+      return (
+        <Box sx={markdownSx}>
+          <ReactMarkdown remarkPlugins={[remarkGfm]}>{displayContent}</ReactMarkdown>
+        </Box>
+      );
+    }
+    return (
+      <Box
+        component="pre"
+        sx={{ m: 0, fontFamily: 'inherit', color: 'var(--text)', whiteSpace: 'pre-wrap', wordBreak: 'break-all' }}
+      >
+        <code>{displayContent}</code>
+      </Box>
+    );
+  };
   return (
     <Box sx={{ height: '100%', display: 'flex', flexDirection: 'column', bgcolor: 'var(--panel)' }}>
+      {/* ── Header ─────────────────────────────────────────────── */}
+      <Box
+        sx={{
+          height: 60,
+          display: 'flex',
+          alignItems: 'center',
+          justifyContent: 'space-between',
+          px: 2,
+          borderBottom: '1px solid var(--border)',
+          flexShrink: 0,
+        }}
+      >
+        <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, flex: 1, minWidth: 0 }}>
+          {panelData ? (
+            <>
+              <Typography
+                variant="caption"
+                sx={{
+                  fontWeight: 600,
+                  color: 'var(--muted-text)',
+                  textTransform: 'uppercase',
+                  letterSpacing: '0.05em',
+                  fontSize: '0.7rem',
+                  flexShrink: 0,
+                }}
+              >
+                {panelData.title}
+              </Typography>
+              {hasBothViews && (
+                <Box sx={{ display: 'flex', gap: 0.5, ml: 1 }}>
+                  <ViewToggle
+                    view="script"
+                    icon={<CodeIcon sx={{ fontSize: 14 }} />}
+                    label="Script"
+                    isActive={panelView === 'script'}
+                    onClick={setPanelView}
+                  />
+                  <ViewToggle
+                    view="output"
+                    icon={<ArticleIcon sx={{ fontSize: 14 }} />}
+                    label="Result"
+                    isActive={panelView === 'output'}
+                    onClick={setPanelView}
+                  />
                 </Box>
+              )}
+            </>
+          ) : (
+            <Typography
+              variant="caption"
+              sx={{ fontWeight: 600, color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em' }}
+            >
+              Code Panel
+            </Typography>
+          )}
+        </Box>
+        <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+          {activeSection?.content && (
+            <Tooltip title={copied ? 'Copied!' : 'Copy'} placement="top">
+              <IconButton
+                size="small"
+                onClick={handleCopy}
+                sx={{
+                  color: copied ? 'var(--accent-green)' : 'var(--muted-text)',
+                  '&:hover': { color: 'var(--accent-yellow)', bgcolor: 'var(--hover-bg)' },
+                }}
+              >
+                {copied ? <CheckIcon sx={{ fontSize: 18 }} /> : <ContentCopyIcon sx={{ fontSize: 18 }} />}
+              </IconButton>
+            </Tooltip>
+          )}
+          {isEditableScript && !isEditing && (
+            <Button
+              size="small"
+              startIcon={<EditIcon sx={{ fontSize: 14 }} />}
+              onClick={handleStartEdit}
+              sx={{
+                textTransform: 'none',
+                color: 'var(--muted-text)',
+                fontSize: '0.75rem',
+                py: 0.5,
+                '&:hover': { color: 'var(--accent-yellow)', bgcolor: 'var(--hover-bg)' },
+              }}
+            >
+              Edit
+            </Button>
+          )}
+          {isEditing && (
+            <>
+              <Button
+                size="small"
+                startIcon={<UndoIcon sx={{ fontSize: 14 }} />}
+                onClick={handleCancelEdit}
+                sx={{
+                  textTransform: 'none',
+                  color: 'var(--muted-text)',
+                  fontSize: '0.75rem',
+                  py: 0.5,
+                  '&:hover': { color: 'var(--accent-red)', bgcolor: 'var(--hover-bg)' },
+                }}
+              >
+                Cancel
+              </Button>
+              <Button
+                size="small"
+                variant="contained"
+                onClick={handleSaveEdit}
+                disabled={!hasUnsavedChanges}
+                sx={{
+                  textTransform: 'none',
+                  fontSize: '0.75rem',
+                  py: 0.5,
+                  bgcolor: hasUnsavedChanges ? 'var(--accent-yellow)' : 'var(--hover-bg)',
+                  color: hasUnsavedChanges ? '#000' : 'var(--muted-text)',
+                  '&:hover': {
+                    bgcolor: hasUnsavedChanges ? 'var(--accent-yellow)' : 'var(--hover-bg)',
+                    opacity: 0.9,
+                  },
+                  '&.Mui-disabled': {
+                    bgcolor: 'var(--hover-bg)',
+                    color: 'var(--muted-text)',
+                    opacity: 0.5,
+                  },
+                }}
+              >
+                Save
+              </Button>
+            </>
+          )}
+          <IconButton size="small" onClick={() => setRightPanelOpen(false)} sx={{ color: 'var(--muted-text)' }}>
+            <CloseIcon fontSize="small" />
+          </IconButton>
+        </Box>
       </Box>
+      {/* ── Main content area ─────────────────────────────────── */}
       <Box sx={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}>
+        {!panelData ? (
           <Box sx={{ flex: 1, display: 'flex', alignItems: 'center', justifyContent: 'center', p: 4 }}>
             <Typography variant="body2" color="text.secondary" sx={{ opacity: 0.5 }}>
               NO DATA LOADED
               ref={scrollRef}
               className="code-panel"
               sx={{
+                bgcolor: 'var(--code-panel-bg)',
                 borderRadius: 'var(--radius-md)',
+                p: '18px',
+                border: '1px solid var(--border)',
+                fontFamily: '"JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
                 fontSize: '13px',
                 lineHeight: 1.55,
                 height: '100%',
                 overflow: 'auto',
               }}
             >
+              {renderContent()}
             </Box>
           </Box>
         )}
       </Box>
+      {/* ── Plan display (bottom) ─────────────────────────────── */}
       {plan && plan.length > 0 && (
+        <Box
+          sx={{
+            borderTop: '1px solid var(--border)',
+            bgcolor: 'var(--plan-bg)',
             maxHeight: '30%',
             display: 'flex',
+            flexDirection: 'column',
+          }}
+        >
+          <Box
+            sx={{
+              p: 1.5,
+              borderBottom: '1px solid var(--border)',
+              display: 'flex',
+              alignItems: 'center',
+              gap: 1,
+            }}
+          >
+            <Typography
+              variant="caption"
+              sx={{ fontWeight: 600, color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em' }}
+            >
+              CURRENT PLAN
+            </Typography>
+          </Box>
+          <Stack spacing={1} sx={{ p: 2, overflow: 'auto' }}>
+            {plan.map((item) => (
+              <Stack key={item.id} direction="row" alignItems="flex-start" spacing={1.5}>
+                <Box sx={{ mt: 0.2 }}>
+                  <PlanStatusIcon status={item.status} />
+                </Box>
+                <Typography
+                  variant="body2"
+                  sx={{
+                    fontSize: '13px',
+                    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',
+                    color: item.status === 'completed' ? 'var(--muted-text)' : 'var(--text)',
+                    textDecoration: item.status === 'completed' ? 'line-through' : 'none',
+                    opacity: item.status === 'pending' ? 0.7 : 1,
+                  }}
+                >
+                  {item.content}
                 </Typography>
+              </Stack>
+            ))}
+          </Stack>
         </Box>
       )}
     </Box>

frontend/src/components/Layout/AppLayout.tsx CHANGED Viewed

@@ -1,65 +1,83 @@
-import { useCallback, useRef, useEffect } from 'react';
 import {
   Box,
   Drawer,
   Typography,
   IconButton,
 } from '@mui/material';
 import MenuIcon from '@mui/icons-material/Menu';
 import ChevronLeftIcon from '@mui/icons-material/ChevronLeft';
 import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
 import { useSessionStore } from '@/store/sessionStore';
 import { useAgentStore } from '@/store/agentStore';
 import { useLayoutStore } from '@/store/layoutStore';
-import { useAgentWebSocket } from '@/hooks/useAgentWebSocket';
 import SessionSidebar from '@/components/SessionSidebar/SessionSidebar';
 import CodePanel from '@/components/CodePanel/CodePanel';
 import ChatInput from '@/components/Chat/ChatInput';
 import MessageList from '@/components/Chat/MessageList';
-import type { Message } from '@/types/agent';
 const DRAWER_WIDTH = 260;
 export default function AppLayout() {
-  const { activeSessionId } = useSessionStore();
-  const { isConnected, isProcessing, getMessages, addMessage } = useAgentStore();
   const {
     isLeftSidebarOpen,
     isRightPanelOpen,
     rightPanelWidth,
     setRightPanelWidth,
     toggleLeftSidebar,
-    toggleRightPanel
   } = useLayoutStore();
-  const isResizing = useRef(false);
-  const startResizing = useCallback((e: React.MouseEvent) => {
-    e.preventDefault();
-    isResizing.current = true;
-    document.addEventListener('mousemove', handleMouseMove);
-    document.addEventListener('mouseup', stopResizing);
-    document.body.style.cursor = 'col-resize';
-  }, []);
-  const stopResizing = useCallback(() => {
-    isResizing.current = false;
-    document.removeEventListener('mousemove', handleMouseMove);
-    document.removeEventListener('mouseup', stopResizing);
-    document.body.style.cursor = 'default';
-  }, []);
   const handleMouseMove = useCallback((e: MouseEvent) => {
     if (!isResizing.current) return;
     const newWidth = window.innerWidth - e.clientX;
-    const maxWidth = window.innerWidth * 0.8;
     const minWidth = 300;
     if (newWidth > minWidth && newWidth < maxWidth) {
       setRightPanelWidth(newWidth);
     }
   }, [setRightPanelWidth]);
   useEffect(() => {
     return () => {
       document.removeEventListener('mousemove', handleMouseMove);
@@ -67,75 +85,157 @@ export default function AppLayout() {
     };
   }, [handleMouseMove, stopResizing]);
-  const messages = activeSessionId ? getMessages(activeSessionId) : [];
-  useAgentWebSocket({
     sessionId: activeSessionId,
-    onReady: () => console.log('Agent ready'),
-    onError: (error) => console.error('Agent error:', error),
   });
   const handleSendMessage = useCallback(
     async (text: string) => {
-      if (!activeSessionId || !text.trim()) return;
-      const userMsg: Message = {
-        id: `user_${Date.now()}`,
-        role: 'user',
-        content: text.trim(),
-        timestamp: new Date().toISOString(),
-      };
-      addMessage(activeSessionId, userMsg);
-      try {
-        await fetch('/api/submit', {
           method: 'POST',
-          headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({
-            session_id: activeSessionId,
-            text: text.trim(),
-          }),
-        });
-      } catch (e) {
-        console.error('Send failed:', e);
       }
     },
-    [activeSessionId, addMessage]
   );
   return (
     <Box sx={{ display: 'flex', width: '100%', height: '100%' }}>
-      {/* Left Sidebar Drawer */}
-      <Box
-        component="nav"
-        sx={{
-          width: { md: isLeftSidebarOpen ? DRAWER_WIDTH : 0 },
-          flexShrink: { md: 0 },
-          transition: isResizing.current ? 'none' : 'width 0.2s',
-          overflow: 'hidden',
-        }}
-      >
-        <Drawer
-          variant="persistent"
           sx={{
-            display: { xs: 'none', md: 'block' },
-            '& .MuiDrawer-paper': {
-              boxSizing: 'border-box',
-              width: DRAWER_WIDTH,
-              borderRight: '1px solid',
-              borderColor: 'divider',
-              top: 0,
-              height: '100%',
-              bgcolor: 'var(--panel)', // Ensure correct background matches sidebar
-            },
           }}
-          open={isLeftSidebarOpen}
         >
-          <SessionSidebar />
-        </Drawer>
-      </Box>
-      {/* Main Content Area */}
       <Box
         sx={{
           flexGrow: 1,
@@ -143,142 +243,226 @@ export default function AppLayout() {
           display: 'flex',
           flexDirection: 'column',
           transition: isResizing.current ? 'none' : 'width 0.2s',
-          position: 'relative',
           overflow: 'hidden',
         }}
       >
-        {/* Top Header Bar (Fixed) */}
         <Box sx={{
-          height: '60px',
-          px: 1,
           display: 'flex',
           alignItems: 'center',
           borderBottom: 1,
           borderColor: 'divider',
           bgcolor: 'background.default',
           zIndex: 1200,
         }}>
           <IconButton onClick={toggleLeftSidebar} size="small">
-            {isLeftSidebarOpen ? <ChevronLeftIcon /> : <MenuIcon />}
           </IconButton>
-          <Box sx={{ flex: 1, display: 'flex', justifyContent: 'center' }}>
-            <img
-              src="/hf-logo-white.png"
-              alt="Hugging Face"
-              style={{ height: '40px', objectFit: 'contain' }}
             />
           </Box>
-          <IconButton
-            onClick={toggleRightPanel}
-            size="small"
-            sx={{ visibility: isRightPanelOpen ? 'hidden' : 'visible' }}
-          >
-            <MenuIcon />
-          </IconButton>
         </Box>
         <Box
-          component="main"
-          className="chat-pane"
           sx={{
             flexGrow: 1,
             display: 'flex',
-            flexDirection: 'column',
             overflow: 'hidden',
-            background: 'linear-gradient(180deg, var(--bg), var(--panel))',
-            padding: '24px',
           }}
         >
-          {activeSessionId ? (
             <>
-              <MessageList messages={messages} isProcessing={isProcessing} />
-              <ChatInput
-                onSend={handleSendMessage}
-                disabled={isProcessing || !isConnected}
-              />
             </>
-          ) : (
-            <Box
-              sx={{
-                flex: 1,
-                display: 'flex',
-                alignItems: 'center',
-                justifyContent: 'center',
-                flexDirection: 'column',
-                gap: 2,
-              }}
-            >
-              <Typography variant="h5" color="text.secondary" sx={{ fontFamily: 'monospace' }}>
-                NO SESSION SELECTED
-              </Typography>
-              <Typography variant="body2" color="text.secondary" sx={{ fontFamily: 'monospace' }}>
-                Initialize a session via the sidebar
-              </Typography>
-            </Box>
           )}
         </Box>
       </Box>
-      {/* Resize Handle */}
-      {isRightPanelOpen && (
-        <Box
-          onMouseDown={startResizing}
-          sx={{
-            width: '4px',
-            cursor: 'col-resize',
-            bgcolor: 'divider',
-            display: 'flex',
-            alignItems: 'center',
-            justifyContent: 'center',
-            transition: 'background-color 0.2s',
-            zIndex: 1300,
-            overflow: 'hidden',
-            '&:hover': {
-              bgcolor: 'primary.main',
-            },
-          }}
-        >
-          <DragIndicatorIcon
-            sx={{
-              fontSize: '0.8rem',
-              color: 'text.secondary',
-              pointerEvents: 'none',
-            }}
-          />
-        </Box>
-      )}
-      {/* Right Panel Drawer */}
-      <Box
-        component="nav"
-        sx={{
-          width: { md: isRightPanelOpen ? rightPanelWidth : 0 },
-          flexShrink: { md: 0 },
-          transition: isResizing.current ? 'none' : 'width 0.2s',
-          overflow: 'hidden',
-        }}
-      >
         <Drawer
-          anchor="right"
-          variant="persistent"
           sx={{
-            display: { xs: 'none', md: 'block' },
             '& .MuiDrawer-paper': {
-              boxSizing: 'border-box',
-              width: rightPanelWidth,
-              borderLeft: 'none',
-              top: 0,
-              height: '100%',
               bgcolor: 'var(--panel)',
             },
           }}
-          open={isRightPanelOpen}
         >
           <CodePanel />
         </Drawer>
-      </Box>
     </Box>
   );
 }

+import { useCallback, useRef, useEffect, useState } from 'react';
 import {
+  Avatar,
   Box,
   Drawer,
   Typography,
   IconButton,
+  Alert,
+  AlertTitle,
+  Snackbar,
+  useMediaQuery,
+  useTheme,
 } from '@mui/material';
 import MenuIcon from '@mui/icons-material/Menu';
 import ChevronLeftIcon from '@mui/icons-material/ChevronLeft';
 import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
+import DarkModeOutlinedIcon from '@mui/icons-material/DarkModeOutlined';
+import LightModeOutlinedIcon from '@mui/icons-material/LightModeOutlined';
+import { logger } from '@/utils/logger';
 import { useSessionStore } from '@/store/sessionStore';
 import { useAgentStore } from '@/store/agentStore';
 import { useLayoutStore } from '@/store/layoutStore';
+import { useAgentChat } from '@/hooks/useAgentChat';
 import SessionSidebar from '@/components/SessionSidebar/SessionSidebar';
 import CodePanel from '@/components/CodePanel/CodePanel';
 import ChatInput from '@/components/Chat/ChatInput';
 import MessageList from '@/components/Chat/MessageList';
+import WelcomeScreen from '@/components/WelcomeScreen/WelcomeScreen';
+import { apiFetch } from '@/utils/api';
 const DRAWER_WIDTH = 260;
 export default function AppLayout() {
+  const { sessions, activeSessionId, deleteSession, updateSessionTitle } = useSessionStore();
+  const { isConnected, isProcessing, setProcessing, activityStatus, llmHealthError, setLlmHealthError, user } = useAgentStore();
   const {
     isLeftSidebarOpen,
     isRightPanelOpen,
     rightPanelWidth,
+    themeMode,
     setRightPanelWidth,
+    setLeftSidebarOpen,
     toggleLeftSidebar,
+    toggleTheme,
   } = useLayoutStore();
+  const theme = useTheme();
+  const isMobile = useMediaQuery(theme.breakpoints.down('md'));
+  const [showExpiredToast, setShowExpiredToast] = useState(false);
+  const disconnectTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const isResizing = useRef(false);
   const handleMouseMove = useCallback((e: MouseEvent) => {
     if (!isResizing.current) return;
     const newWidth = window.innerWidth - e.clientX;
+    const maxWidth = window.innerWidth * 0.6;
     const minWidth = 300;
     if (newWidth > minWidth && newWidth < maxWidth) {
       setRightPanelWidth(newWidth);
     }
   }, [setRightPanelWidth]);
+  const stopResizing = useCallback(() => {
+    isResizing.current = false;
+    document.removeEventListener('mousemove', handleMouseMove);
+    document.removeEventListener('mouseup', stopResizing);
+    document.body.style.cursor = 'default';
+  }, [handleMouseMove]);
+  const startResizing = useCallback((e: React.MouseEvent) => {
+    e.preventDefault();
+    isResizing.current = true;
+    document.addEventListener('mousemove', handleMouseMove);
+    document.addEventListener('mouseup', stopResizing);
+    document.body.style.cursor = 'col-resize';
+  }, [handleMouseMove, stopResizing]);
   useEffect(() => {
     return () => {
       document.removeEventListener('mousemove', handleMouseMove);
     };
   }, [handleMouseMove, stopResizing]);
+  // ── LLM health check on mount ───────────────────────────────────
+  useEffect(() => {
+    let cancelled = false;
+    (async () => {
+      try {
+        const res = await apiFetch('/api/health/llm');
+        const data = await res.json();
+        if (!cancelled && data.status === 'error') {
+          setLlmHealthError({
+            error: data.error || 'Unknown LLM error',
+            errorType: data.error_type || 'unknown',
+            model: data.model,
+          });
+        } else if (!cancelled) {
+          setLlmHealthError(null);
+        }
+      } catch {
+        // Backend unreachable — not an LLM issue, ignore
+      }
+    })();
+    return () => { cancelled = true; };
+  }, []); // eslint-disable-line react-hooks/exhaustive-deps
+  const hasAnySessions = sessions.length > 0;
+  const { messages, sendMessage, undoLastTurn, approveTools } = useAgentChat({
     sessionId: activeSessionId,
+    onReady: () => logger.log('Agent ready'),
+    onError: (error) => logger.error('Agent error:', error),
+    onSessionDead: (deadSessionId) => {
+      logger.log('Removing dead session:', deadSessionId);
+      deleteSession(deadSessionId);
+    },
   });
+  // Debounced "session expired" toast — only fires after 2s of sustained disconnect
+  useEffect(() => {
+    if (!isConnected && messages.length > 0 && activeSessionId) {
+      disconnectTimer.current = setTimeout(() => setShowExpiredToast(true), 2000);
+    } else {
+      if (disconnectTimer.current) clearTimeout(disconnectTimer.current);
+      disconnectTimer.current = null;
+      setShowExpiredToast(false);
+    }
+    return () => {
+      if (disconnectTimer.current) clearTimeout(disconnectTimer.current);
+    };
+  }, [isConnected, messages.length, activeSessionId]);
   const handleSendMessage = useCallback(
     async (text: string) => {
+      if (!activeSessionId || !text.trim() || isProcessing) return;
+      setProcessing(true);
+      sendMessage({ text: text.trim(), metadata: { createdAt: new Date().toISOString() } });
+      // Auto-title the session from the first user message (async, non-blocking)
+      const isFirstMessage = messages.filter((m) => m.role === 'user').length <= 1;
+      if (isFirstMessage) {
+        const sessionId = activeSessionId;
+        apiFetch('/api/title', {
           method: 'POST',
+          body: JSON.stringify({ session_id: sessionId, text: text.trim() }),
+        })
+          .then((res) => res.json())
+          .then((data) => {
+            if (data.title) updateSessionTitle(sessionId, data.title);
+          })
+          .catch(() => {
+            const raw = text.trim();
+            updateSessionTitle(sessionId, raw.length > 40 ? raw.slice(0, 40) + '…' : raw);
+          });
       }
     },
+    [activeSessionId, sendMessage, messages, updateSessionTitle, isProcessing, setProcessing],
   );
+  // Close sidebar on mobile after selecting a session
+  const handleSidebarClose = useCallback(() => {
+    if (isMobile) setLeftSidebarOpen(false);
+  }, [isMobile, setLeftSidebarOpen]);
+  // ── LLM error toast helper ──────────────────────────────────────────
+  const llmErrorTitle = llmHealthError
+    ? llmHealthError.errorType === 'credits'
+      ? 'API Credits Exhausted'
+      : llmHealthError.errorType === 'auth'
+      ? 'Invalid API Key'
+      : llmHealthError.errorType === 'rate_limit'
+      ? 'Rate Limited'
+      : llmHealthError.errorType === 'network'
+      ? 'LLM Provider Unreachable'
+      : 'LLM Error'
+    : '';
+  // ── Welcome screen: no sessions at all ────────────────────────────
+  if (!hasAnySessions) {
+    return (
+      <Box sx={{ width: '100%', height: '100%', display: 'flex', flexDirection: 'column' }}>
+        <WelcomeScreen />
+      </Box>
+    );
+  }
+  // ── Sidebar drawer ────────────────────────────────────────────────
+  const sidebarDrawer = (
+    <Drawer
+      variant={isMobile ? 'temporary' : 'persistent'}
+      anchor="left"
+      open={isLeftSidebarOpen}
+      onClose={() => setLeftSidebarOpen(false)}
+      ModalProps={{ keepMounted: true }} // Better mobile perf
+      sx={{
+        '& .MuiDrawer-paper': {
+          boxSizing: 'border-box',
+          width: DRAWER_WIDTH,
+          borderRight: '1px solid',
+          borderColor: 'divider',
+          top: 0,
+          height: '100%',
+          bgcolor: 'var(--panel)',
+        },
+      }}
+    >
+      <SessionSidebar onClose={handleSidebarClose} />
+    </Drawer>
+  );
+  // ── Main chat interface ───────────────────────────────────────────
   return (
     <Box sx={{ display: 'flex', width: '100%', height: '100%' }}>
+      {/* ── Left Sidebar ─────────────────────────────────────────── */}
+      {isMobile ? (
+        // Mobile: temporary overlay drawer (no reserved width)
+        sidebarDrawer
+      ) : (
+        // Desktop: persistent drawer with reserved width
+        <Box
+          component="nav"
           sx={{
+            width: isLeftSidebarOpen ? DRAWER_WIDTH : 0,
+            flexShrink: 0,
+            transition: isResizing.current ? 'none' : 'width 0.2s',
+            overflow: 'hidden',
           }}
         >
+          {sidebarDrawer}
+        </Box>
+      )}
+      {/* ── Main Content (header + chat + code panel) ────────────── */}
       <Box
         sx={{
           flexGrow: 1,
           display: 'flex',
           flexDirection: 'column',
           transition: isResizing.current ? 'none' : 'width 0.2s',
           overflow: 'hidden',
+          minWidth: 0,
         }}
       >
+        {/* ── Top Header Bar ─────────────────────────────────────── */}
         <Box sx={{
+          height: { xs: 52, md: 60 },
+          px: { xs: 1, md: 2 },
           display: 'flex',
           alignItems: 'center',
           borderBottom: 1,
           borderColor: 'divider',
           bgcolor: 'background.default',
           zIndex: 1200,
+          flexShrink: 0,
         }}>
           <IconButton onClick={toggleLeftSidebar} size="small">
+            {isLeftSidebarOpen && !isMobile ? <ChevronLeftIcon /> : <MenuIcon />}
           </IconButton>
+          <Box sx={{ flex: 1, display: 'flex', justifyContent: 'center', alignItems: 'center', gap: 0.75 }}>
+            <Box
+              component="img"
+              src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
+              alt="HF"
+              sx={{ width: { xs: 20, md: 22 }, height: { xs: 20, md: 22 } }}
             />
+            <Typography
+              variant="subtitle1"
+              sx={{
+                fontWeight: 700,
+                color: 'var(--text)',
+                letterSpacing: '-0.01em',
+                fontSize: { xs: '0.88rem', md: '0.95rem' },
+              }}
+            >
+              HF Agent
+            </Typography>
           </Box>
+          <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+            <IconButton
+              onClick={toggleTheme}
+              size="small"
+              sx={{
+                color: 'text.secondary',
+                '&:hover': { color: 'primary.main' },
+              }}
+            >
+              {themeMode === 'dark' ? <LightModeOutlinedIcon fontSize="small" /> : <DarkModeOutlinedIcon fontSize="small" />}
+            </IconButton>
+            {user?.picture ? (
+              <Avatar
+                src={user.picture}
+                alt={user.username || 'User'}
+                sx={{ width: 28, height: 28, ml: 0.5 }}
+              />
+            ) : user?.username ? (
+              <Avatar
+                sx={{
+                  width: 28,
+                  height: 28,
+                  ml: 0.5,
+                  bgcolor: 'primary.main',
+                  fontSize: '0.75rem',
+                  fontWeight: 700,
+                }}
+              >
+                {user.username[0].toUpperCase()}
+              </Avatar>
+            ) : null}
+          </Box>
         </Box>
+        {/* ── Chat + Code Panel ─────────────────────────���────────── */}
         <Box
           sx={{
             flexGrow: 1,
             display: 'flex',
             overflow: 'hidden',
           }}
         >
+          {/* Chat area */}
+          <Box
+            component="main"
+            className="chat-pane"
+            sx={{
+              flexGrow: 1,
+              display: 'flex',
+              flexDirection: 'column',
+              overflow: 'hidden',
+              background: 'var(--body-gradient)',
+              p: { xs: 1.5, sm: 2, md: 3 },
+              minWidth: 0,
+            }}
+          >
+            {activeSessionId ? (
+              <>
+                <MessageList messages={messages} isProcessing={isProcessing} approveTools={approveTools} onUndoLastTurn={undoLastTurn} />
+                <ChatInput
+                  onSend={handleSendMessage}
+                  disabled={isProcessing || !isConnected || activityStatus.type === 'waiting-approval'}
+                  placeholder={activityStatus.type === 'waiting-approval' ? 'Approve or reject pending tools first...' : undefined}
+                />
+              </>
+            ) : (
+              <Box
+                sx={{
+                  flex: 1,
+                  display: 'flex',
+                  alignItems: 'center',
+                  justifyContent: 'center',
+                  flexDirection: 'column',
+                  gap: 2,
+                  px: 2,
+                }}
+              >
+                <Typography variant="h5" color="text.secondary" sx={{ fontFamily: 'monospace', fontSize: { xs: '1rem', md: '1.5rem' } }}>
+                  NO SESSION SELECTED
+                </Typography>
+                <Typography variant="body2" color="text.secondary" sx={{ fontFamily: 'monospace', fontSize: { xs: '0.75rem', md: '0.875rem' } }}>
+                  Initialize a session via the sidebar
+                </Typography>
+              </Box>
+            )}
+          </Box>
+          {/* Code panel — inline on desktop, overlay drawer on mobile */}
+          {isRightPanelOpen && !isMobile && (
             <>
+              <Box
+                onMouseDown={startResizing}
+                sx={{
+                  width: '4px',
+                  cursor: 'col-resize',
+                  bgcolor: 'divider',
+                  display: 'flex',
+                  alignItems: 'center',
+                  justifyContent: 'center',
+                  transition: 'background-color 0.2s',
+                  flexShrink: 0,
+                  '&:hover': { bgcolor: 'primary.main' },
+                }}
+              >
+                <DragIndicatorIcon
+                  sx={{ fontSize: '0.8rem', color: 'text.secondary', pointerEvents: 'none' }}
+                />
+              </Box>
+              <Box
+                sx={{
+                  width: rightPanelWidth,
+                  flexShrink: 0,
+                  height: '100%',
+                  overflow: 'hidden',
+                  borderLeft: '1px solid',
+                  borderColor: 'divider',
+                  bgcolor: 'var(--panel)',
+                }}
+              >
+                <CodePanel />
+              </Box>
             </>
           )}
         </Box>
       </Box>
+      {/* Code panel — drawer overlay on mobile */}
+      {isMobile && (
         <Drawer
+          anchor="bottom"
+          open={isRightPanelOpen}
+          onClose={() => useLayoutStore.getState().setRightPanelOpen(false)}
           sx={{
             '& .MuiDrawer-paper': {
+              height: '75vh',
+              borderTopLeftRadius: 16,
+              borderTopRightRadius: 16,
               bgcolor: 'var(--panel)',
             },
           }}
         >
           <CodePanel />
         </Drawer>
+      )}
+      <Snackbar
+        open={showExpiredToast}
+        anchorOrigin={{ vertical: 'bottom', horizontal: 'center' }}
+        onClose={() => setShowExpiredToast(false)}
+      >
+        <Alert
+          severity="warning"
+          variant="filled"
+          onClose={() => setShowExpiredToast(false)}
+          sx={{ fontFamily: 'monospace', fontSize: '0.8rem' }}
+        >
+          Session expired — create a new session to continue.
+        </Alert>
+      </Snackbar>
+      <Snackbar
+        open={!!llmHealthError}
+        anchorOrigin={{ vertical: 'top', horizontal: 'center' }}
+        onClose={() => setLlmHealthError(null)}
+      >
+        <Alert
+          severity="error"
+          variant="filled"
+          onClose={() => setLlmHealthError(null)}
+          sx={{ fontSize: '0.8rem', maxWidth: 480 }}
+        >
+          <AlertTitle sx={{ fontWeight: 700, fontSize: '0.85rem' }}>
+            {llmErrorTitle}
+          </AlertTitle>
+          {llmHealthError && (
+            <Typography variant="body2" sx={{ fontSize: '0.78rem', opacity: 0.9 }}>
+              {llmHealthError.model} — {llmHealthError.error.slice(0, 150)}
+            </Typography>
+          )}
+        </Alert>
+      </Snackbar>
     </Box>
   );
 }

frontend/src/components/SessionSidebar/SessionSidebar.tsx CHANGED Viewed

@@ -1,246 +1,344 @@
-import { useCallback } from 'react';
 import {
   Box,
-  List,
-  ListItem,
   IconButton,
   Typography,
-  Button,
-  Tooltip,
 } from '@mui/material';
-import DeleteIcon from '@mui/icons-material/Delete';
-import UndoIcon from '@mui/icons-material/Undo';
 import { useSessionStore } from '@/store/sessionStore';
 import { useAgentStore } from '@/store/agentStore';
 interface SessionSidebarProps {
   onClose?: () => void;
 }
-const StatusDiode = ({ connected }: { connected: boolean }) => (
   <Box
     sx={{
-      width: 10,
-      height: 10,
       borderRadius: '50%',
-      bgcolor: connected ? 'var(--accent-green)' : 'var(--accent-red)', // Use green/red for connection status
-      boxShadow: connected ? '0 0 6px rgba(47, 204, 113, 0.4)' : 'none',
-      transition: 'all 0.3s ease',
     }}
   />
 );
-const RunningIndicator = () => (
-    <Box
-      className="running-indicator"
-      sx={{
-        width: 10,
-        height: 10,
-        borderRadius: '50%',
-        bgcolor: 'var(--accent-yellow)',
-        boxShadow: '0 0 6px rgba(199,165,0,0.18)',
-      }}
-    />
-);
 export default function SessionSidebar({ onClose }: SessionSidebarProps) {
   const { sessions, activeSessionId, createSession, deleteSession, switchSession } =
     useSessionStore();
-  const { clearMessages, isConnected, isProcessing, setPlan, setPanelContent } = useAgentStore();
   const handleNewSession = useCallback(async () => {
     try {
-      const response = await fetch('/api/session', { method: 'POST' });
       const data = await response.json();
       createSession(data.session_id);
-      // Clear plan and code panel for new session
       setPlan([]);
-      setPanelContent(null);
       onClose?.();
-    } catch (e) {
-      console.error('Failed to create session:', e);
     }
-  }, [createSession, setPlan, setPanelContent, onClose]);
-  const handleDeleteSession = useCallback(
     async (sessionId: string, e: React.MouseEvent) => {
       e.stopPropagation();
       try {
-        await fetch(`/api/session/${sessionId}`, { method: 'DELETE' });
         deleteSession(sessionId);
-        clearMessages(sessionId);
-      } catch (e) {
-        console.error('Failed to delete session:', e);
       }
     },
-    [deleteSession, clearMessages]
   );
-  const handleSelectSession = useCallback(
     (sessionId: string) => {
       switchSession(sessionId);
-      // Clear plan and code panel when switching sessions
       setPlan([]);
-      setPanelContent(null);
       onClose?.();
     },
-    [switchSession, setPlan, setPanelContent, onClose]
   );
-  const handleUndo = useCallback(async () => {
-    if (!activeSessionId) return;
-    try {
-      await fetch(`/api/undo/${activeSessionId}`, { method: 'POST' });
-    } catch (e) {
-      console.error('Undo failed:', e);
-    }
-  }, [activeSessionId]);
-  const formatTime = (dateString: string) => {
-    return new Date(dateString).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
-  };
   return (
-    <Box className="sidebar" sx={{ height: '100%', display: 'flex', flexDirection: 'column', bgcolor: 'var(--panel)' }}>
-      {/* Header - Aligned with AppLayout 60px */}
-      <Box sx={{
-        height: '60px',
-        display: 'flex',
-        alignItems: 'center',
-        px: 2,
-        borderBottom: '1px solid rgba(255,255,255,0.03)'
-      }}>
-        <Box className="brand-logo" sx={{ display: 'flex' }}>
-            <img
-              src="/hf-log-only-white.png"
-              alt="HF Agent"
-              style={{ height: '24px', objectFit: 'contain' }}
-            />
-        </Box>
       </Box>
-      {/* Content */}
-      <Box sx={{ flex: 1, display: 'flex', flexDirection: 'column', p: 2, overflow: 'hidden' }}>
-        {/* System Info / Status */}
-        <Box sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
-          <StatusDiode connected={isConnected} />
-          <Typography variant="caption" sx={{ color: 'var(--muted-text)', fontFamily: 'inherit' }}>
-            {isConnected ? 'System Online' : 'Disconnected'}
-          </Typography>
-        </Box>
-        <Button
-          fullWidth
-          className="create-session"
           onClick={handleNewSession}
           sx={{
             display: 'inline-flex',
             alignItems: 'center',
-            justifyContent: 'flex-start',
-            gap: '10px',
-            padding: '10px 14px',
-            borderRadius: 'var(--radius-md)',
-            border: '1px solid rgba(255,255,255,0.06)',
-            bgcolor: 'transparent',
-            color: 'var(--text)',
-            fontWeight: 600,
-            textTransform: 'none',
-            mb: 3,
             '&:hover': {
-                bgcolor: 'rgba(255,255,255,0.02)',
-                border: '1px solid rgba(255,255,255,0.1)',
             },
-            '&::before': {
-                content: '""',
-                width: '4px',
-                height: '20px',
-                background: 'linear-gradient(180deg, var(--accent-yellow), rgba(199,165,0,0.9))',
-                borderRadius: '4px',
-            }
           }}
         >
-          New Session
-        </Button>
-        {/* Session List */}
-        <Box sx={{ flex: 1, overflow: 'auto', mx: -1, px: 1 }}>
-            <List disablePadding sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
-            {[...sessions].reverse().map((session, index) => {
-                const sessionNumber = sessions.length - index;
-                const isSelected = session.id === activeSessionId;
-                return (
-                <ListItem
-                    key={session.id}
-                    disablePadding
-                    className="session-item"
-                    onClick={() => handleSelectSession(session.id)}
-                    sx={{
-                        display: 'flex',
-                        alignItems: 'center',
-                        gap: '12px',
-                        padding: '10px',
-                        borderRadius: 'var(--radius-md)',
-                        bgcolor: isSelected ? 'rgba(255,255,255,0.05)' : 'transparent',
-                        cursor: 'pointer',
-                        transition: 'background 0.18s ease, transform 0.08s ease',
-                        '&:hover': {
-                            bgcolor: 'rgba(255,255,255,0.02)',
-                            transform: 'translateY(-1px)',
-                        },
-                        '& .delete-btn': {
-                            opacity: 0,
-                            transition: 'opacity 0.2s',
-                        },
-                        '&:hover .delete-btn': {
-                            opacity: 1,
-                        }
-                    }}
-                >
-                    <Box sx={{ flex: 1, overflow: 'hidden' }}>
-                        <Typography variant="body2" sx={{ fontWeight: 500, color: 'var(--text)', whiteSpace: 'nowrap', overflow: 'hidden', textOverflow: 'ellipsis' }}>
-                            Session {String(sessionNumber).padStart(2, '0')}
-                        </Typography>
-                        <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mt: 0.5 }}>
-                            {session.isActive && <RunningIndicator />}
-                            <Typography className="time" variant="caption" sx={{ fontSize: '12px', color: 'var(--muted-text)' }}>
-                                {formatTime(session.createdAt)}
-                            </Typography>
-                        </Box>
-                    </Box>
-                    <IconButton
-                        className="delete-btn"
-                        size="small"
-                        onClick={(e) => handleDeleteSession(session.id, e)}
-                        sx={{ color: 'var(--muted-text)', '&:hover': { color: 'var(--accent-red)' } }}
-                    >
-                        <DeleteIcon fontSize="small" />
-                    </IconButton>
-                </ListItem>
-                );
-            })}
-            </List>
         </Box>
-      </Box>
-      {/* Footer */}
-      <Box sx={{ p: 2, borderTop: '1px solid rgba(255,255,255,0.03)' }}>
-        <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
-            <Typography variant="caption" className="small-note" sx={{ fontSize: '12px', color: 'var(--muted-text)' }}>
-            {sessions.length} active
-            </Typography>
-            <Tooltip title="Undo last turn">
-            <span>
-                <IconButton
-                onClick={handleUndo}
-                disabled={!activeSessionId || isProcessing}
-                size="small"
-                sx={{ color: 'var(--muted-text)', '&:hover': { color: 'var(--text)' } }}
-                >
-                <UndoIcon fontSize="small" />
-                </IconButton>
-            </span>
-            </Tooltip>
         </Box>
       </Box>
     </Box>

+import { useCallback, useState } from 'react';
 import {
+  Alert,
   Box,
   IconButton,
   Typography,
+  CircularProgress,
+  Divider,
 } from '@mui/material';
+import AddIcon from '@mui/icons-material/Add';
+import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';
+import ChatBubbleOutlineIcon from '@mui/icons-material/ChatBubbleOutline';
 import { useSessionStore } from '@/store/sessionStore';
 import { useAgentStore } from '@/store/agentStore';
+import { apiFetch } from '@/utils/api';
 interface SessionSidebarProps {
   onClose?: () => void;
 }
+/** Small coloured dot for connection status */
+const StatusDot = ({ connected }: { connected: boolean }) => (
   <Box
     sx={{
+      width: 6,
+      height: 6,
       borderRadius: '50%',
+      bgcolor: connected ? 'var(--accent-green)' : 'var(--accent-red)',
+      boxShadow: connected ? '0 0 4px rgba(76,175,80,0.4)' : 'none',
+      flexShrink: 0,
     }}
   />
 );
 export default function SessionSidebar({ onClose }: SessionSidebarProps) {
   const { sessions, activeSessionId, createSession, deleteSession, switchSession } =
     useSessionStore();
+  const { isConnected, setPlan, clearPanel } =
+    useAgentStore();
+  const [isCreatingSession, setIsCreatingSession] = useState(false);
+  const [capacityError, setCapacityError] = useState<string | null>(null);
+  // ── Handlers ──────────────────────────────────────────────────────
   const handleNewSession = useCallback(async () => {
+    if (isCreatingSession) return;
+    setIsCreatingSession(true);
+    setCapacityError(null);
     try {
+      const response = await apiFetch('/api/session', { method: 'POST' });
+      if (response.status === 503) {
+        const data = await response.json();
+        setCapacityError(data.detail || 'Server is at capacity.');
+        return;
+      }
       const data = await response.json();
       createSession(data.session_id);
       setPlan([]);
+      clearPanel();
       onClose?.();
+    } catch {
+      setCapacityError('Failed to create session.');
+    } finally {
+      setIsCreatingSession(false);
     }
+  }, [isCreatingSession, createSession, setPlan, clearPanel, onClose]);
+  const handleDelete = useCallback(
     async (sessionId: string, e: React.MouseEvent) => {
       e.stopPropagation();
       try {
+        await apiFetch(`/api/session/${sessionId}`, { method: 'DELETE' });
+        deleteSession(sessionId);
+      } catch {
+        // Delete locally even if backend fails (session may already be gone)
         deleteSession(sessionId);
       }
     },
+    [deleteSession],
   );
+  const handleSelect = useCallback(
     (sessionId: string) => {
       switchSession(sessionId);
       setPlan([]);
+      clearPanel();
       onClose?.();
     },
+    [switchSession, setPlan, clearPanel, onClose],
   );
+  const formatTime = (d: string) =>
+    new Date(d).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
+  // ── Render ────────────────────────────────────────────────────────
   return (
+    <Box
+      sx={{
+        height: '100%',
+        display: 'flex',
+        flexDirection: 'column',
+        bgcolor: 'var(--panel)',
+      }}
+    >
+      {/* ── Header ─────────────────────────────────────────────────── */}
+      <Box sx={{ px: 1.75, pt: 2, pb: 0 }}>
+        <Typography
+          variant="caption"
+          sx={{
+            color: 'var(--muted-text)',
+            fontSize: '0.65rem',
+            fontWeight: 600,
+            textTransform: 'uppercase',
+            letterSpacing: '0.08em',
+          }}
+        >
+          Recent chats
+        </Typography>
       </Box>
+      {/* ── Capacity error ─────────────────────────────────────────── */}
+      {capacityError && (
+        <Alert
+          severity="warning"
+          variant="outlined"
+          onClose={() => setCapacityError(null)}
+          sx={{
+            m: 1,
+            fontSize: '0.7rem',
+            py: 0.25,
+            '& .MuiAlert-message': { py: 0 },
+            borderColor: '#FF9D00',
+            color: 'var(--text)',
+          }}
+        >
+          {capacityError}
+        </Alert>
+      )}
+      {/* ── Session list ───────────────────────────────────────────── */}
+      <Box
+        sx={{
+          flex: 1,
+          overflow: 'auto',
+          py: 1,
+          // Thinner scrollbar
+          '&::-webkit-scrollbar': { width: 4 },
+          '&::-webkit-scrollbar-thumb': {
+            bgcolor: 'var(--scrollbar-thumb)',
+            borderRadius: 2,
+          },
+        }}
+      >
+        {sessions.length === 0 ? (
+          <Box
+            sx={{
+              display: 'flex',
+              flexDirection: 'column',
+              alignItems: 'center',
+              justifyContent: 'center',
+              py: 8,
+              px: 3,
+              gap: 1.5,
+            }}
+          >
+            <ChatBubbleOutlineIcon
+              sx={{ fontSize: 28, color: 'var(--muted-text)', opacity: 0.25 }}
+            />
+            <Typography
+              variant="caption"
+              sx={{
+                color: 'var(--muted-text)',
+                opacity: 0.5,
+                textAlign: 'center',
+                lineHeight: 1.5,
+                fontSize: '0.72rem',
+              }}
+            >
+              No sessions yet
+            </Typography>
+          </Box>
+        ) : (
+          [...sessions].reverse().map((session, index) => {
+            const num = sessions.length - index;
+            const isSelected = session.id === activeSessionId;
+            return (
+              <Box
+                key={session.id}
+                onClick={() => handleSelect(session.id)}
+                sx={{
+                  display: 'flex',
+                  alignItems: 'center',
+                  gap: 1,
+                  px: 1.5,
+                  py: 0.875,
+                  mx: 0.75,
+                  borderRadius: '10px',
+                  cursor: 'pointer',
+                  transition: 'background-color 0.12s ease',
+                  bgcolor: isSelected
+                    ? 'var(--hover-bg)'
+                    : 'transparent',
+                  '&:hover': {
+                    bgcolor: 'var(--hover-bg)',
+                  },
+                  '& .delete-btn': {
+                    opacity: 0,
+                    transition: 'opacity 0.12s',
+                  },
+                  '&:hover .delete-btn': {
+                    opacity: 1,
+                  },
+                }}
+              >
+                <ChatBubbleOutlineIcon
+                  sx={{
+                    fontSize: 15,
+                    color: isSelected ? 'var(--text)' : 'var(--muted-text)',
+                    opacity: isSelected ? 0.8 : 0.4,
+                    flexShrink: 0,
+                  }}
+                />
+                <Box sx={{ flex: 1, minWidth: 0 }}>
+                  <Typography
+                    variant="body2"
+                    sx={{
+                      fontWeight: isSelected ? 600 : 400,
+                      color: 'var(--text)',
+                      fontSize: '0.84rem',
+                      lineHeight: 1.4,
+                      whiteSpace: 'nowrap',
+                      overflow: 'hidden',
+                      textOverflow: 'ellipsis',
+                    }}
+                  >
+                    {session.title.startsWith('Chat ') ? `Session ${String(num).padStart(2, '0')}` : session.title}
+                  </Typography>
+                  <Typography
+                    variant="caption"
+                    sx={{
+                      color: 'var(--muted-text)',
+                      fontSize: '0.65rem',
+                      lineHeight: 1.2,
+                    }}
+                  >
+                    {formatTime(session.createdAt)}
+                  </Typography>
+                </Box>
+                <IconButton
+                  className="delete-btn"
+                  size="small"
+                  onClick={(e) => handleDelete(session.id, e)}
+                  sx={{
+                    color: 'var(--muted-text)',
+                    width: 26,
+                    height: 26,
+                    flexShrink: 0,
+                    '&:hover': { color: 'var(--accent-red)', bgcolor: 'rgba(244,67,54,0.08)' },
+                  }}
+                >
+                  <DeleteOutlineIcon sx={{ fontSize: 15 }} />
+                </IconButton>
+              </Box>
+            );
+          })
+        )}
+      </Box>
+      {/* ── Footer: New Session + status ──────────────────────────── */}
+      <Divider sx={{ opacity: 0.5 }} />
+      <Box
+        sx={{
+          px: 1.5,
+          py: 1.5,
+          display: 'flex',
+          flexDirection: 'column',
+          gap: 1,
+          flexShrink: 0,
+        }}
+      >
+        <Box
+          component="button"
           onClick={handleNewSession}
+          disabled={isCreatingSession}
           sx={{
             display: 'inline-flex',
             alignItems: 'center',
+            justifyContent: 'center',
+            gap: 0.75,
+            width: '100%',
+            px: 1.5,
+            py: 1.25,
+            border: 'none',
+            borderRadius: '10px',
+            bgcolor: '#FF9D00',
+            color: '#000',
+            fontSize: '0.85rem',
+            fontWeight: 700,
+            cursor: 'pointer',
+            transition: 'all 0.12s ease',
             '&:hover': {
+              bgcolor: '#FFB340',
+            },
+            '&:disabled': {
+              opacity: 0.5,
+              cursor: 'not-allowed',
             },
           }}
         >
+          {isCreatingSession ? (
+            <>
+              <CircularProgress size={12} sx={{ color: '#000' }} />
+              Creating...
+            </>
+          ) : (
+            <>
+              <AddIcon sx={{ fontSize: 16 }} />
+              New Session
+            </>
+          )}
         </Box>
+        <Box
+          sx={{
+            display: 'flex',
+            alignItems: 'center',
+            justifyContent: 'center',
+            gap: 0.5,
+          }}
+        >
+          <StatusDot connected={isConnected} />
+          <Typography
+            variant="caption"
+            sx={{ color: 'var(--muted-text)', fontSize: '0.62rem', letterSpacing: '0.02em' }}
+          >
+            {sessions.length} session{sessions.length !== 1 ? 's' : ''} &middot; Backend {isConnected ? 'online' : 'offline'}
+          </Typography>
         </Box>
       </Box>
     </Box>

frontend/src/components/WelcomeScreen/WelcomeScreen.tsx ADDED Viewed

	@@ -0,0 +1,247 @@

+import { useState, useCallback } from 'react';
+import {
+  Box,
+  Typography,
+  Button,
+  CircularProgress,
+  Alert,
+} from '@mui/material';
+import OpenInNewIcon from '@mui/icons-material/OpenInNew';
+import { useSessionStore } from '@/store/sessionStore';
+import { useAgentStore } from '@/store/agentStore';
+import { apiFetch } from '@/utils/api';
+import { isInIframe, triggerLogin } from '@/hooks/useAuth';
+/** HF brand orange */
+const HF_ORANGE = '#FF9D00';
+export default function WelcomeScreen() {
+  const { createSession } = useSessionStore();
+  const { setPlan, clearPanel, user } = useAgentStore();
+  const [isCreating, setIsCreating] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const inIframe = isInIframe();
+  const isAuthenticated = user?.authenticated;
+  const isDevUser = user?.username === 'dev';
+  const handleStart = useCallback(async () => {
+    if (isCreating) return;
+    // Not authenticated and not dev → need to login
+    if (!isAuthenticated && !isDevUser) {
+      // In iframe: can't redirect (cookies blocked) — user needs to open in new tab
+      // This shouldn't happen because we show a different button in iframe
+      // But just in case:
+      if (inIframe) return;
+      triggerLogin();
+      return;
+    }
+    setIsCreating(true);
+    setError(null);
+    try {
+      const response = await apiFetch('/api/session', { method: 'POST' });
+      if (response.status === 503) {
+        const data = await response.json();
+        setError(data.detail || 'Server is at capacity. Please try again later.');
+        return;
+      }
+      if (response.status === 401) {
+        triggerLogin();
+        return;
+      }
+      if (!response.ok) {
+        setError('Failed to create session. Please try again.');
+        return;
+      }
+      const data = await response.json();
+      createSession(data.session_id);
+      setPlan([]);
+      clearPanel();
+    } catch {
+      // Redirect may throw — ignore
+    } finally {
+      setIsCreating(false);
+    }
+  }, [isCreating, createSession, setPlan, clearPanel, isAuthenticated, isDevUser, inIframe]);
+  // Build the direct Space URL for the "open in new tab" link
+  const spaceHost = typeof window !== 'undefined'
+    ? window.location.hostname.includes('.hf.space')
+      ? window.location.origin
+      : `https://smolagents-ml-agent.hf.space`
+    : '';
+  return (
+    <Box
+      sx={{
+        width: '100%',
+        height: '100%',
+        display: 'flex',
+        flexDirection: 'column',
+        alignItems: 'center',
+        justifyContent: 'center',
+        background: 'var(--body-gradient)',
+        py: 8,
+      }}
+    >
+      {/* HF Logo */}
+      <Box
+        component="img"
+        src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
+        alt="Hugging Face"
+        sx={{ width: 96, height: 96, mb: 3, display: 'block' }}
+      />
+      {/* Title */}
+      <Typography
+        variant="h2"
+        sx={{
+          fontWeight: 800,
+          color: 'var(--text)',
+          mb: 1.5,
+          letterSpacing: '-0.02em',
+          fontSize: { xs: '2rem', md: '2.8rem' },
+        }}
+      >
+        HF Agent
+      </Typography>
+      {/* Description */}
+      <Typography
+        variant="body1"
+        sx={{
+          color: 'var(--muted-text)',
+          maxWidth: 520,
+          mb: 5,
+          lineHeight: 1.8,
+          fontSize: '0.95rem',
+          textAlign: 'center',
+          px: 2,
+          '& strong': { color: 'var(--text)', fontWeight: 600 },
+        }}
+      >
+        A general-purpose AI agent for <strong>machine learning engineering</strong>.
+        It browses <strong>Hugging Face documentation</strong>, manages{' '}
+        <strong>repositories</strong>, launches <strong>training jobs</strong>,
+        and explores <strong>datasets</strong> — all through natural conversation.
+      </Typography>
+      {/* Action button — depends on context */}
+      {inIframe && !isAuthenticated && !isDevUser ? (
+        // In iframe + not logged in → link to open Space directly
+        <Button
+          variant="contained"
+          size="large"
+          component="a"
+          href={spaceHost}
+          target="_blank"
+          rel="noopener noreferrer"
+          endIcon={<OpenInNewIcon />}
+          sx={{
+            px: 5,
+            py: 1.5,
+            fontSize: '1rem',
+            fontWeight: 700,
+            textTransform: 'none',
+            borderRadius: '12px',
+            bgcolor: HF_ORANGE,
+            color: '#000',
+            boxShadow: '0 4px 24px rgba(255, 157, 0, 0.3)',
+            textDecoration: 'none',
+            '&:hover': {
+              bgcolor: '#FFB340',
+              boxShadow: '0 6px 32px rgba(255, 157, 0, 0.45)',
+            },
+          }}
+        >
+          Open HF Agent
+        </Button>
+      ) : !isAuthenticated && !isDevUser ? (
+        // Direct access + not logged in → sign in button
+        <Button
+          variant="contained"
+          size="large"
+          onClick={() => triggerLogin()}
+          sx={{
+            px: 5,
+            py: 1.5,
+            fontSize: '1rem',
+            fontWeight: 700,
+            textTransform: 'none',
+            borderRadius: '12px',
+            bgcolor: HF_ORANGE,
+            color: '#000',
+            boxShadow: '0 4px 24px rgba(255, 157, 0, 0.3)',
+            '&:hover': {
+              bgcolor: '#FFB340',
+              boxShadow: '0 6px 32px rgba(255, 157, 0, 0.45)',
+            },
+          }}
+        >
+          Sign in with Hugging Face
+        </Button>
+      ) : (
+        // Authenticated or dev → start session
+        <Button
+          variant="contained"
+          size="large"
+          onClick={handleStart}
+          disabled={isCreating}
+          startIcon={
+            isCreating ? <CircularProgress size={20} color="inherit" /> : null
+          }
+          sx={{
+            px: 5,
+            py: 1.5,
+            fontSize: '1rem',
+            fontWeight: 700,
+            textTransform: 'none',
+            borderRadius: '12px',
+            bgcolor: HF_ORANGE,
+            color: '#000',
+            boxShadow: '0 4px 24px rgba(255, 157, 0, 0.3)',
+            '&:hover': {
+              bgcolor: '#FFB340',
+              boxShadow: '0 6px 32px rgba(255, 157, 0, 0.45)',
+            },
+            '&.Mui-disabled': {
+              bgcolor: 'rgba(255, 157, 0, 0.35)',
+              color: 'rgba(0,0,0,0.45)',
+            },
+          }}
+        >
+          {isCreating ? 'Initializing...' : 'Start Session'}
+        </Button>
+      )}
+      {/* Error */}
+      {error && (
+        <Alert
+          severity="warning"
+          variant="outlined"
+          onClose={() => setError(null)}
+          sx={{
+            mt: 3,
+            maxWidth: 400,
+            fontSize: '0.8rem',
+            borderColor: HF_ORANGE,
+            color: 'var(--text)',
+          }}
+        >
+          {error}
+        </Alert>
+      )}
+      {/* Footnote */}
+      <Typography
+        variant="caption"
+        sx={{ mt: 5, color: 'var(--muted-text)', opacity: 0.5, fontSize: '0.7rem' }}
+      >
+        Conversations are stored locally in your browser.
+      </Typography>
+    </Box>
+  );
+}

frontend/src/hooks/useAgentChat.ts ADDED Viewed

	@@ -0,0 +1,278 @@

+/**
+ * Central hook wiring the Vercel AI SDK's useChat with our custom
+ * WebSocketChatTransport. Replaces the old useAgentWebSocket + agentStore
+ * message management.
+ */
+import { useCallback, useEffect, useMemo, useRef } from 'react';
+import { useChat } from '@ai-sdk/react';
+import type { UIMessage } from 'ai';
+import { WebSocketChatTransport, type SideChannelCallbacks } from '@/lib/ws-chat-transport';
+import { loadMessages, saveMessages } from '@/lib/chat-message-store';
+import { apiFetch } from '@/utils/api';
+import { useAgentStore } from '@/store/agentStore';
+import { useSessionStore } from '@/store/sessionStore';
+import { useLayoutStore } from '@/store/layoutStore';
+import { logger } from '@/utils/logger';
+interface UseAgentChatOptions {
+  sessionId: string | null;
+  onReady?: () => void;
+  onError?: (error: string) => void;
+  onSessionDead?: (sessionId: string) => void;
+}
+export function useAgentChat({ sessionId, onReady, onError, onSessionDead }: UseAgentChatOptions) {
+  const callbacksRef = useRef({ onReady, onError, onSessionDead });
+  callbacksRef.current = { onReady, onError, onSessionDead };
+  const {
+    setProcessing,
+    setConnected,
+    setActivityStatus,
+    setError,
+    setPanel,
+    setPanelOutput,
+  } = useAgentStore();
+  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
+  const { setSessionActive } = useSessionStore();
+  // ── Build side-channel callbacks (stable ref) ────────────────────
+  const sideChannel = useMemo<SideChannelCallbacks>(
+    () => ({
+      onReady: () => {
+        setConnected(true);
+        setProcessing(false);
+        if (sessionId) setSessionActive(sessionId, true);
+        callbacksRef.current.onReady?.();
+      },
+      onShutdown: () => {
+        setConnected(false);
+        setProcessing(false);
+      },
+      onError: (error: string) => {
+        setError(error);
+        setProcessing(false);
+        callbacksRef.current.onError?.(error);
+      },
+      onProcessing: () => {
+        setProcessing(true);
+        setActivityStatus({ type: 'thinking' });
+      },
+      onProcessingDone: () => {
+        setProcessing(false);
+      },
+      onUndoComplete: () => {
+        setProcessing(false);
+        // Remove the last turn (user msg + assistant response) from useChat state
+        const setMsgs = chatActionsRef.current.setMessages;
+        const msgs = chatActionsRef.current.messages;
+        if (setMsgs && msgs.length > 0) {
+          let lastUserIdx = -1;
+          for (let i = msgs.length - 1; i >= 0; i--) {
+            if (msgs[i].role === 'user') { lastUserIdx = i; break; }
+          }
+          const updated = lastUserIdx > 0 ? msgs.slice(0, lastUserIdx) : [];
+          setMsgs(updated);
+          if (sessionId) saveMessages(sessionId, updated);
+        }
+      },
+      onCompacted: (oldTokens: number, newTokens: number) => {
+        logger.log(`Context compacted: ${oldTokens} → ${newTokens} tokens`);
+      },
+      onPlanUpdate: (plan) => {
+        useAgentStore.getState().setPlan(plan as Array<{ id: string; content: string; status: 'pending' | 'in_progress' | 'completed' }>);
+        if (!useLayoutStore.getState().isRightPanelOpen) {
+          setRightPanelOpen(true);
+        }
+      },
+      onToolLog: (tool: string, log: string) => {
+        if (tool === 'hf_jobs') {
+          const state = useAgentStore.getState();
+          const existingOutput = state.panelData?.output?.content || '';
+          const newContent = existingOutput
+            ? existingOutput + '\n' + log
+            : '--- Job execution started ---\n' + log;
+          setPanelOutput({ content: newContent, language: 'text' });
+          if (!useLayoutStore.getState().isRightPanelOpen) {
+            setRightPanelOpen(true);
+          }
+        }
+      },
+      onConnectionChange: (connected: boolean) => {
+        setConnected(connected);
+      },
+      onSessionDead: (deadSessionId: string) => {
+        logger.warn(`Session ${deadSessionId} dead, removing`);
+        callbacksRef.current.onSessionDead?.(deadSessionId);
+      },
+      onApprovalRequired: (tools) => {
+        if (!tools.length) return;
+        setActivityStatus({ type: 'waiting-approval' });
+        const firstTool = tools[0];
+        const args = firstTool.arguments as Record<string, string | undefined>;
+        if (firstTool.tool === 'hf_jobs' && args.script) {
+          setPanel(
+            { title: 'Script', script: { content: args.script, language: 'python' }, parameters: firstTool.arguments as Record<string, unknown> },
+            'script',
+            true,
+          );
+        } else if (firstTool.tool === 'hf_repo_files' && args.content) {
+          const filename = args.path || 'file';
+          setPanel({
+            title: filename.split('/').pop() || 'Content',
+            script: { content: args.content, language: filename.endsWith('.py') ? 'python' : 'text' },
+            parameters: firstTool.arguments as Record<string, unknown>,
+          });
+        } else {
+          setPanel({
+            title: firstTool.tool,
+            output: { content: JSON.stringify(firstTool.arguments, null, 2), language: 'json' },
+          }, 'output');
+        }
+        setRightPanelOpen(true);
+        setLeftSidebarOpen(false);
+      },
+      onToolCallPanel: (toolName: string, args: Record<string, unknown>) => {
+        if (toolName === 'hf_jobs' && args.operation && args.script) {
+          setPanel(
+            { title: 'Script', script: { content: String(args.script), language: 'python' }, parameters: args },
+            'script',
+          );
+          setRightPanelOpen(true);
+          setLeftSidebarOpen(false);
+        } else if (toolName === 'hf_repo_files' && args.operation === 'upload' && args.content) {
+          setPanel({
+            title: `File Upload: ${String(args.path || 'unnamed')}`,
+            script: { content: String(args.content), language: String(args.path || '').endsWith('.py') ? 'python' : 'text' },
+            parameters: args,
+          });
+          setRightPanelOpen(true);
+          setLeftSidebarOpen(false);
+        }
+      },
+      onToolOutputPanel: (toolName: string, _toolCallId: string, output: string, success: boolean) => {
+        if (toolName === 'hf_jobs' && output) {
+          setPanelOutput({ content: output, language: 'markdown' });
+          if (!success) useAgentStore.getState().setPanelView('output');
+        }
+      },
+      onStreaming: () => {
+        setActivityStatus({ type: 'streaming' });
+      },
+      onToolRunning: (toolName: string) => {
+        setActivityStatus({ type: 'tool', toolName });
+      },
+    }),
+    // Zustand setters are stable
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    [sessionId],
+  );
+  // ── Create transport (single stable instance for the lifetime of this hook) ──
+  const transportRef = useRef<WebSocketChatTransport | null>(null);
+  if (!transportRef.current) {
+    transportRef.current = new WebSocketChatTransport({ sideChannel });
+  }
+  // Keep side-channel callbacks in sync (they capture sessionId)
+  useEffect(() => {
+    transportRef.current?.updateSideChannel(sideChannel);
+  }, [sideChannel]);
+  // Connect / disconnect WebSocket when session changes
+  useEffect(() => {
+    transportRef.current?.connectToSession(sessionId);
+    return () => {
+      transportRef.current?.connectToSession(null);
+    };
+  }, [sessionId]);
+  // ── Restore persisted messages for this session ─────────────────
+  const initialMessages = useMemo(
+    () => (sessionId ? loadMessages(sessionId) : []),
+    [sessionId],
+  );
+  // ── Ref for chat actions (used by sideChannel callbacks created before chat) ──
+  const chatActionsRef = useRef<{
+    setMessages: ((msgs: UIMessage[]) => void) | null;
+    messages: UIMessage[];
+  }>({ setMessages: null, messages: [] });
+  // ── useChat from Vercel AI SDK ───────────────────────────────────
+  const chat = useChat({
+    id: sessionId || '__no_session__',
+    messages: initialMessages,
+    transport: transportRef.current!,
+    experimental_throttle: 80,
+    onFinish: ({ messages, isAbort, isError }) => {
+      if (isAbort || isError) return;
+      if (sessionId && messages.length > 0) {
+        saveMessages(sessionId, messages);
+      }
+    },
+    onError: (error) => {
+      logger.error('useChat error:', error);
+      setError(error.message);
+      setProcessing(false);
+    },
+  });
+  // Keep chatActionsRef in sync every render
+  chatActionsRef.current.setMessages = chat.setMessages;
+  chatActionsRef.current.messages = chat.messages;
+  // ── Persist messages on every user send (onFinish covers assistant turns) ──
+  const prevLenRef = useRef(initialMessages.length);
+  useEffect(() => {
+    if (!sessionId || chat.messages.length === 0) return;
+    if (chat.messages.length !== prevLenRef.current) {
+      prevLenRef.current = chat.messages.length;
+      saveMessages(sessionId, chat.messages);
+    }
+  }, [sessionId, chat.messages]);
+  // ── Undo last turn (calls backend + syncs useChat + localStorage) ──
+  const undoLastTurn = useCallback(async () => {
+    if (!sessionId) return;
+    try {
+      const res = await apiFetch(`/api/undo/${sessionId}`, { method: 'POST' });
+      if (!res.ok) {
+        logger.error('Undo API returned', res.status);
+        return;
+      }
+    } catch (e) {
+      logger.error('Undo failed:', e);
+    }
+    // Backend will also send undo_complete, but we apply optimistically
+    // so the UI updates immediately.
+  }, [sessionId]);
+  // ── Convenience: approve tools via transport ─────────────────────
+  const approveTools = useCallback(
+    async (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => {
+      if (!sessionId || !transportRef.current) return false;
+      const ok = await transportRef.current.approveTools(sessionId, approvals);
+      if (ok) {
+        const hasApproved = approvals.some(a => a.approved);
+        if (hasApproved) setProcessing(true);
+      }
+      return ok;
+    },
+    [sessionId, setProcessing],
+  );
+  return {
+    messages: chat.messages,
+    sendMessage: chat.sendMessage,
+    status: chat.status,
+    undoLastTurn,
+    approveTools,
+    transport: transportRef.current,
+  };
+}

frontend/src/hooks/useAgentWebSocket.ts DELETED Viewed

@@ -1,503 +0,0 @@
-import { useCallback, useEffect, useRef } from 'react';
-import { useAgentStore } from '@/store/agentStore';
-import { useSessionStore } from '@/store/sessionStore';
-import { useLayoutStore } from '@/store/layoutStore';
-import type { AgentEvent } from '@/types/events';
-import type { Message, TraceLog } from '@/types/agent';
-const WS_RECONNECT_DELAY = 1000;
-const WS_MAX_RECONNECT_DELAY = 30000;
-interface UseAgentWebSocketOptions {
-  sessionId: string | null;
-  onReady?: () => void;
-  onError?: (error: string) => void;
-}
-export function useAgentWebSocket({
-  sessionId,
-  onReady,
-  onError,
-}: UseAgentWebSocketOptions) {
-  const wsRef = useRef<WebSocket | null>(null);
-  const reconnectTimeoutRef = useRef<number | null>(null);
-  const reconnectDelayRef = useRef(WS_RECONNECT_DELAY);
-  const {
-    addMessage,
-    updateMessage,
-    setProcessing,
-    setConnected,
-    setPendingApprovals,
-    setError,
-    addTraceLog,
-    updateTraceLog,
-    clearTraceLogs,
-    setPanelContent,
-    setPanelTab,
-    setActivePanelTab,
-    clearPanelTabs,
-    setPlan,
-    setCurrentTurnMessageId,
-    updateCurrentTurnTrace,
-  } = useAgentStore();
-  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
-  const { setSessionActive } = useSessionStore();
-  const handleEvent = useCallback(
-    (event: AgentEvent) => {
-      if (!sessionId) return;
-      switch (event.event_type) {
-        case 'ready':
-          setConnected(true);
-          setProcessing(false);
-          setSessionActive(sessionId, true);
-          onReady?.();
-          break;
-        case 'processing':
-          setProcessing(true);
-          clearTraceLogs();
-          // Don't clear panel tabs here - they should persist during approval flow
-          // Tabs will be cleared when a new tool_call sets up new content
-          setCurrentTurnMessageId(null); // Start a new turn
-          break;
-        case 'assistant_message': {
-          const content = (event.data?.content as string) || '';
-          const currentTrace = useAgentStore.getState().traceLogs;
-          const currentTurnMsgId = useAgentStore.getState().currentTurnMessageId;
-          if (currentTurnMsgId) {
-            // Update existing message - add segments chronologically
-            const messages = useAgentStore.getState().getMessages(sessionId);
-            const existingMsg = messages.find(m => m.id === currentTurnMsgId);
-            if (existingMsg) {
-              const segments = existingMsg.segments ? [...existingMsg.segments] : [];
-              // If there are pending traces, add them as a tools segment first
-              if (currentTrace.length > 0) {
-                segments.push({ type: 'tools', tools: [...currentTrace] });
-                clearTraceLogs();
-              }
-              // Add the new text segment
-              if (content) {
-                segments.push({ type: 'text', content });
-              }
-              updateMessage(sessionId, currentTurnMsgId, {
-                content: existingMsg.content + '\n\n' + content,
-                segments,
-              });
-            }
-          } else {
-            // Create new message
-            const messageId = `msg_${Date.now()}`;
-            const segments: Array<{ type: 'text' | 'tools'; content?: string; tools?: typeof currentTrace }> = [];
-            // Add any pending traces first
-            if (currentTrace.length > 0) {
-              segments.push({ type: 'tools', tools: [...currentTrace] });
-              clearTraceLogs();
-            }
-            // Add the text
-            if (content) {
-              segments.push({ type: 'text', content });
-            }
-            const message: Message = {
-              id: messageId,
-              role: 'assistant',
-              content,
-              timestamp: new Date().toISOString(),
-              segments,
-            };
-            addMessage(sessionId, message);
-            setCurrentTurnMessageId(messageId);
-          }
-          break;
-        }
-        case 'tool_call': {
-          const toolName = (event.data?.tool as string) || 'unknown';
-          const args = (event.data?.arguments as Record<string, any>) || {};
-          // Don't display plan_tool in trace logs (it shows up elsewhere in the UI)
-          if (toolName !== 'plan_tool') {
-            const log: TraceLog = {
-              id: `tool_${Date.now()}`,
-              type: 'call',
-              text: `Agent is executing ${toolName}...`,
-              tool: toolName,
-              timestamp: new Date().toISOString(),
-              completed: false,
-              // Store args for auto-exec message creation later
-              args: toolName === 'hf_jobs' ? args : undefined,
-            };
-            addTraceLog(log);
-            // Update the current turn message's trace in real-time
-            updateCurrentTurnTrace(sessionId);
-          }
-          // Auto-expand Right Panel for specific tools
-          if (toolName === 'hf_jobs' && (args.operation === 'run' || args.operation === 'scheduled run') && args.script) {
-            // Clear any existing tabs from previous jobs before setting new script
-            clearPanelTabs();
-            // Use tab system for jobs - add script tab immediately
-            setPanelTab({
-              id: 'script',
-              title: 'Script',
-              content: args.script,
-              language: 'python',
-              parameters: args
-            });
-            setActivePanelTab('script');
-            setRightPanelOpen(true);
-            setLeftSidebarOpen(false);
-          } else if (toolName === 'hf_repo_files' && args.operation === 'upload' && args.content) {
-            setPanelContent({
-              title: `File Upload: ${args.path || 'unnamed'}`,
-              content: args.content,
-              parameters: args,
-              language: args.path?.endsWith('.py') ? 'python' : undefined
-            });
-            setRightPanelOpen(true);
-            setLeftSidebarOpen(false);
-          }
-          console.log('Tool call:', toolName, args);
-          break;
-        }
-        case 'tool_output': {
-          const toolName = (event.data?.tool as string) || 'unknown';
-          const output = (event.data?.output as string) || '';
-          const success = event.data?.success as boolean;
-          // Mark the corresponding trace log as completed and store the output
-          updateTraceLog(toolName, { completed: true, output, success });
-          // Update the current turn message's trace in real-time
-          updateCurrentTurnTrace(sessionId);
-          // Special handling for hf_jobs - update or create job message with output
-          if (toolName === 'hf_jobs') {
-            const messages = useAgentStore.getState().getMessages(sessionId);
-            const traceLogs = useAgentStore.getState().traceLogs;
-            // Find existing approval message for this job
-            let jobMsg = [...messages].reverse().find(m => m.approval);
-            if (!jobMsg) {
-              // No approval message exists - this was an auto-executed job
-              // Create a job execution message so user can see results
-              const jobTrace = [...traceLogs].reverse().find(t => t.tool === 'hf_jobs');
-              const args = jobTrace?.args || {};
-              const autoExecMessage: Message = {
-                id: `msg_auto_${Date.now()}`,
-                role: 'assistant',
-                content: '',
-                timestamp: new Date().toISOString(),
-                approval: {
-                  status: 'approved', // Auto-approved (no user action needed)
-                  batch: {
-                    tools: [{
-                      tool: toolName,
-                      arguments: args,
-                      tool_call_id: `auto_${Date.now()}`
-                    }],
-                    count: 1
-                  }
-                },
-                toolOutput: output
-              };
-              addMessage(sessionId, autoExecMessage);
-              console.log('Created auto-exec message with tool output:', toolName);
-            } else {
-              // Update existing approval message
-              const currentOutput = jobMsg.toolOutput || '';
-              const newOutput = currentOutput ? currentOutput + '\n\n' + output : output;
-              useAgentStore.getState().updateMessage(sessionId, jobMsg.id, {
-                toolOutput: newOutput
-              });
-              console.log('Updated job message with tool output:', toolName);
-            }
-          }
-          // Don't create message bubbles for tool outputs - they only show in trace logs
-          console.log('Tool output:', toolName, success);
-          break;
-        }
-        case 'tool_log': {
-          const toolName = (event.data?.tool as string) || 'unknown';
-          const log = (event.data?.log as string) || '';
-          if (toolName === 'hf_jobs') {
-            const currentTabs = useAgentStore.getState().panelTabs;
-            const logsTab = currentTabs.find(t => t.id === 'logs');
-            // Append to existing logs tab or create new one
-            const newContent = logsTab
-              ? logsTab.content + '\n' + log
-              : '--- Job execution started ---\n' + log;
-            setPanelTab({
-              id: 'logs',
-              title: 'Logs',
-              content: newContent,
-              language: 'text'
-            });
-            // Auto-switch to logs tab when logs start streaming
-            setActivePanelTab('logs');
-            if (!useLayoutStore.getState().isRightPanelOpen) {
-              setRightPanelOpen(true);
-            }
-          }
-          break;
-        }
-        case 'plan_update': {
-          const plan = (event.data?.plan as any[]) || [];
-          setPlan(plan);
-          if (!useLayoutStore.getState().isRightPanelOpen) {
-            setRightPanelOpen(true);
-          }
-          break;
-        }
-        case 'approval_required': {
-          const tools = event.data?.tools as Array<{
-            tool: string;
-            arguments: Record<string, unknown>;
-            tool_call_id: string;
-          }>;
-          const count = (event.data?.count as number) || 0;
-          // Create a persistent message for the approval request
-          const message: Message = {
-            id: `msg_approval_${Date.now()}`,
-            role: 'assistant',
-            content: '', // Content is handled by the approval UI
-            timestamp: new Date().toISOString(),
-            approval: {
-                status: 'pending',
-                batch: { tools, count }
-            }
-          };
-          addMessage(sessionId, message);
-          // Show the first tool's content in the panel so users see what they're approving
-          if (tools && tools.length > 0) {
-            const firstTool = tools[0];
-            const args = firstTool.arguments as Record<string, any>;
-            clearPanelTabs();
-            if (firstTool.tool === 'hf_jobs' && args.script) {
-              setPanelTab({
-                id: 'script',
-                title: 'Script',
-                content: args.script,
-                language: 'python',
-                parameters: args
-              });
-              setActivePanelTab('script');
-            } else if (firstTool.tool === 'hf_repo_files' && args.content) {
-              const filename = args.path || 'file';
-              const isPython = filename.endsWith('.py');
-              setPanelTab({
-                id: 'content',
-                title: filename.split('/').pop() || 'Content',
-                content: args.content,
-                language: isPython ? 'python' : 'text',
-                parameters: args
-              });
-              setActivePanelTab('content');
-            } else {
-              // For other tools, show args as JSON
-              setPanelTab({
-                id: 'args',
-                title: firstTool.tool,
-                content: JSON.stringify(args, null, 2),
-                language: 'json',
-                parameters: args
-              });
-              setActivePanelTab('args');
-            }
-            setRightPanelOpen(true);
-            setLeftSidebarOpen(false);
-          }
-          // Clear currentTurnMessageId so subsequent assistant_message events create a new message below the approval
-          setCurrentTurnMessageId(null);
-          // We don't set pendingApprovals in the global store anymore as the message handles the UI
-          setPendingApprovals(null);
-          setProcessing(false);
-          break;
-        }
-        case 'turn_complete':
-          setProcessing(false);
-          setCurrentTurnMessageId(null); // Clear the current turn
-          break;
-        case 'compacted': {
-          const oldTokens = event.data?.old_tokens as number;
-          const newTokens = event.data?.new_tokens as number;
-          console.log(`Context compacted: ${oldTokens} -> ${newTokens} tokens`);
-          break;
-        }
-        case 'error': {
-          const errorMsg = (event.data?.error as string) || 'Unknown error';
-          setError(errorMsg);
-          setProcessing(false);
-          onError?.(errorMsg);
-          break;
-        }
-        case 'shutdown':
-          setConnected(false);
-          setProcessing(false);
-          break;
-        case 'interrupted':
-          setProcessing(false);
-          break;
-        case 'undo_complete':
-          // Could remove last messages from store
-          break;
-        default:
-          console.log('Unknown event:', event);
-      }
-    },
-    // Zustand setters are stable, so we don't need them in deps
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-    [sessionId, onReady, onError]
-  );
-  const connect = useCallback(() => {
-    if (!sessionId) return;
-    // Don't connect if already connected or connecting
-    if (wsRef.current?.readyState === WebSocket.OPEN ||
-        wsRef.current?.readyState === WebSocket.CONNECTING) {
-      return;
-    }
-    // Connect directly to backend (Vite doesn't proxy WebSockets)
-    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
-    // In development, connect directly to backend port 7860
-    // In production, use the same host
-    const isDev = import.meta.env.DEV;
-    const host = isDev ? '127.0.0.1:7860' : window.location.host;
-    const wsUrl = `${protocol}//${host}/api/ws/${sessionId}`;
-    console.log('Connecting to WebSocket:', wsUrl);
-    const ws = new WebSocket(wsUrl);
-    ws.onopen = () => {
-      console.log('WebSocket connected');
-      setConnected(true);
-      reconnectDelayRef.current = WS_RECONNECT_DELAY;
-    };
-    ws.onmessage = (event) => {
-      try {
-        const data = JSON.parse(event.data) as AgentEvent;
-        handleEvent(data);
-      } catch (e) {
-        console.error('Failed to parse WebSocket message:', e);
-      }
-    };
-    ws.onerror = (error) => {
-      console.error('WebSocket error:', error);
-    };
-    ws.onclose = (event) => {
-      console.log('WebSocket closed', event.code, event.reason);
-      setConnected(false);
-      // Only reconnect if it wasn't a normal closure and session still exists
-      if (event.code !== 1000 && sessionId) {
-        // Attempt to reconnect with exponential backoff
-        if (reconnectTimeoutRef.current) {
-          clearTimeout(reconnectTimeoutRef.current);
-        }
-        reconnectTimeoutRef.current = window.setTimeout(() => {
-          reconnectDelayRef.current = Math.min(
-            reconnectDelayRef.current * 2,
-            WS_MAX_RECONNECT_DELAY
-          );
-          connect();
-        }, reconnectDelayRef.current);
-      }
-    };
-    wsRef.current = ws;
-  }, [sessionId, handleEvent]);
-  const disconnect = useCallback(() => {
-    if (reconnectTimeoutRef.current) {
-      clearTimeout(reconnectTimeoutRef.current);
-      reconnectTimeoutRef.current = null;
-    }
-    if (wsRef.current) {
-      wsRef.current.close();
-      wsRef.current = null;
-    }
-    setConnected(false);
-  }, []);
-  const sendPing = useCallback(() => {
-    if (wsRef.current?.readyState === WebSocket.OPEN) {
-      wsRef.current.send(JSON.stringify({ type: 'ping' }));
-    }
-  }, []);
-  // Connect when sessionId changes (with a small delay to ensure session is ready)
-  useEffect(() => {
-    if (!sessionId) {
-      disconnect();
-      return;
-    }
-    // Small delay to ensure session is fully created on backend
-    const timeoutId = setTimeout(() => {
-      connect();
-    }, 100);
-    return () => {
-      clearTimeout(timeoutId);
-      disconnect();
-    };
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [sessionId]);
-  // Heartbeat
-  useEffect(() => {
-    const interval = setInterval(sendPing, 30000);
-    return () => clearInterval(interval);
-  }, [sendPing]);
-  return {
-    isConnected: wsRef.current?.readyState === WebSocket.OPEN,
-    connect,
-    disconnect,
-  };
-}

frontend/src/hooks/useAuth.ts ADDED Viewed

	@@ -0,0 +1,77 @@

+/**
+ * Authentication hook — simple server-side OAuth.
+ *
+ * - Hors iframe: /auth/login redirect (cookies work fine)
+ * - Dans iframe: show "Open in full page" link
+ *
+ * Token is stored via HttpOnly cookie by the backend.
+ * In dev mode (no OAUTH_CLIENT_ID), auth is bypassed.
+ */
+import { useEffect } from 'react';
+import { useAgentStore } from '@/store/agentStore';
+import { logger } from '@/utils/logger';
+/** Check if we're running inside an iframe. */
+export function isInIframe(): boolean {
+  try {
+    return window.top !== window.self;
+  } catch {
+    return true; // SecurityError = cross-origin iframe
+  }
+}
+/** Redirect to the server-side OAuth login. */
+export function triggerLogin(): void {
+  window.location.href = '/auth/login';
+}
+/**
+ * Hook: on mount, check if user is authenticated.
+ * Sets user in the agent store.
+ */
+export function useAuth() {
+  const setUser = useAgentStore((s) => s.setUser);
+  useEffect(() => {
+    let cancelled = false;
+    async function checkAuth() {
+      try {
+        // Check if user is already authenticated (cookie-based)
+        const response = await fetch('/auth/me', { credentials: 'include' });
+        if (response.ok) {
+          const data = await response.json();
+          if (!cancelled && data.authenticated) {
+            setUser({
+              authenticated: true,
+              username: data.username,
+              name: data.name,
+              picture: data.picture,
+            });
+            logger.log('Authenticated as', data.username);
+            return;
+          }
+        }
+        // Not authenticated — check if auth is enabled
+        const statusRes = await fetch('/auth/status', { credentials: 'include' });
+        const statusData = await statusRes.json();
+        if (!statusData.auth_enabled) {
+          // Dev mode — no OAuth configured
+          if (!cancelled) setUser({ authenticated: true, username: 'dev' });
+          return;
+        }
+        // Auth enabled but not logged in — welcome screen will handle it
+        if (!cancelled) setUser(null);
+      } catch {
+        // Backend unreachable — assume dev mode
+        if (!cancelled) setUser({ authenticated: true, username: 'dev' });
+      }
+    }
+    checkAuth();
+    return () => { cancelled = true; };
+  }, [setUser]);
+}

frontend/src/lib/chat-message-store.ts ADDED Viewed

	@@ -0,0 +1,62 @@

+/**
+ * Lightweight localStorage persistence for UIMessage arrays,
+ * keyed by session ID.
+ *
+ * Uses the same storage namespace (`hf-agent-messages`) that the
+ * old Zustand-based store used, so existing data is compatible.
+ */
+import type { UIMessage } from 'ai';
+import { logger } from '@/utils/logger';
+const STORAGE_KEY = 'hf-agent-messages';
+const MAX_SESSIONS = 50;
+type MessagesMap = Record<string, UIMessage[]>;
+function readAll(): MessagesMap {
+  try {
+    const raw = localStorage.getItem(STORAGE_KEY);
+    if (!raw) return {};
+    const parsed = JSON.parse(raw);
+    // Legacy format was { messagesBySession: {...} }
+    if (parsed.messagesBySession) return parsed.messagesBySession;
+    // New flat format
+    if (typeof parsed === 'object' && !Array.isArray(parsed)) return parsed;
+    return {};
+  } catch {
+    return {};
+  }
+}
+function writeAll(map: MessagesMap): void {
+  try {
+    localStorage.setItem(STORAGE_KEY, JSON.stringify(map));
+  } catch (e) {
+    logger.warn('Failed to persist messages:', e);
+  }
+}
+export function loadMessages(sessionId: string): UIMessage[] {
+  const map = readAll();
+  return map[sessionId] ?? [];
+}
+export function saveMessages(sessionId: string, messages: UIMessage[]): void {
+  const map = readAll();
+  map[sessionId] = messages;
+  // Evict oldest sessions if we exceed the cap
+  const keys = Object.keys(map);
+  if (keys.length > MAX_SESSIONS) {
+    const toRemove = keys.slice(0, keys.length - MAX_SESSIONS);
+    for (const k of toRemove) delete map[k];
+  }
+  writeAll(map);
+}
+export function deleteMessages(sessionId: string): void {
+  const map = readAll();
+  delete map[sessionId];
+  writeAll(map);
+}

frontend/src/lib/ws-chat-transport.ts ADDED Viewed

	@@ -0,0 +1,593 @@

+/**
+ * Custom ChatTransport that bridges our WebSocket-based backend protocol
+ * to the Vercel AI SDK's UIMessageChunk streaming interface.
+ *
+ * The backend stays unchanged — this adapter translates WebSocket events
+ * into the chunk types that useChat() expects.
+ */
+import type { ChatTransport, UIMessage, UIMessageChunk, ChatRequestOptions } from 'ai';
+import { apiFetch, getWebSocketUrl } from '@/utils/api';
+import { logger } from '@/utils/logger';
+import type { AgentEvent } from '@/types/events';
+import { useAgentStore } from '@/store/agentStore';
+// ---------------------------------------------------------------------------
+// Side-channel callback interface (non-chat events forwarded to the store)
+// ---------------------------------------------------------------------------
+export interface SideChannelCallbacks {
+  onReady: () => void;
+  onShutdown: () => void;
+  onError: (error: string) => void;
+  onProcessing: () => void;
+  onProcessingDone: () => void;
+  onUndoComplete: () => void;
+  onCompacted: (oldTokens: number, newTokens: number) => void;
+  onPlanUpdate: (plan: Array<{ id: string; content: string; status: string }>) => void;
+  onToolLog: (tool: string, log: string) => void;
+  onConnectionChange: (connected: boolean) => void;
+  onSessionDead: (sessionId: string) => void;
+  /** Called when approval_required arrives — lets the store manage panels */
+  onApprovalRequired: (tools: Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>) => void;
+  /** Called when a tool_call arrives with panel-relevant args */
+  onToolCallPanel: (tool: string, args: Record<string, unknown>) => void;
+  /** Called when tool_output arrives with panel-relevant data */
+  onToolOutputPanel: (tool: string, toolCallId: string, output: string, success: boolean) => void;
+  /** Called when assistant text starts streaming */
+  onStreaming: () => void;
+  /** Called when a tool starts running (non-plan) */
+  onToolRunning: (toolName: string) => void;
+}
+// ---------------------------------------------------------------------------
+// Transport options
+// ---------------------------------------------------------------------------
+export interface WebSocketChatTransportOptions {
+  sideChannel: SideChannelCallbacks;
+}
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+const WS_RECONNECT_DELAY = 1000;
+const WS_MAX_RECONNECT_DELAY = 30000;
+const WS_MAX_RETRIES = 5;
+const WS_PING_INTERVAL = 30000;
+let partIdCounter = 0;
+function nextPartId(prefix: string): string {
+  return `${prefix}-${Date.now()}-${++partIdCounter}`;
+}
+// ---------------------------------------------------------------------------
+// Transport implementation
+// ---------------------------------------------------------------------------
+export class WebSocketChatTransport implements ChatTransport<UIMessage> {
+  private ws: WebSocket | null = null;
+  private currentSessionId: string | null = null;
+  private sideChannel: SideChannelCallbacks;
+  private streamController: ReadableStreamDefaultController<UIMessageChunk> | null = null;
+  private streamGeneration = 0;
+  private abortedGeneration = 0;
+  private textPartId: string | null = null;
+  private awaitingProcessing = false;
+  private connectTimeout: ReturnType<typeof setTimeout> | null = null;
+  private reconnectTimeout: ReturnType<typeof setTimeout> | null = null;
+  private reconnectDelay = WS_RECONNECT_DELAY;
+  private retries = 0;
+  private pingInterval: ReturnType<typeof setInterval> | null = null;
+  private boundVisibilityHandler: (() => void) | null = null;
+  private wasHidden = false;
+  constructor({ sideChannel }: WebSocketChatTransportOptions) {
+    this.sideChannel = sideChannel;
+    this.setupVisibilityHandler();
+  }
+  private setupVisibilityHandler(): void {
+    this.boundVisibilityHandler = () => {
+      if (document.visibilityState === 'hidden') {
+        this.wasHidden = true;
+        return;
+      }
+      if (document.visibilityState === 'visible' && this.currentSessionId) {
+        const wsState = this.ws?.readyState;
+        if (wsState !== WebSocket.OPEN && wsState !== WebSocket.CONNECTING) {
+          logger.log('Tab visible: WS is dead, reconnecting immediately');
+          this.retries = 0;
+          this.reconnectDelay = WS_RECONNECT_DELAY;
+          this.createWebSocket(this.currentSessionId);
+          if (this.wasHidden) {
+            const store = useAgentStore.getState();
+            if (store.isProcessing) {
+              logger.log('Tab visible after WS drop: resetting stale processing state');
+              store.setProcessing(false);
+              this.closeActiveStream();
+            }
+          }
+        } else if (wsState === WebSocket.OPEN) {
+          this.ws!.send(JSON.stringify({ type: 'ping' }));
+        }
+        this.wasHidden = false;
+      }
+    };
+    document.addEventListener('visibilitychange', this.boundVisibilityHandler);
+  }
+  /** Update side-channel callbacks (e.g. when sessionId changes). */
+  updateSideChannel(sideChannel: SideChannelCallbacks): void {
+    this.sideChannel = sideChannel;
+  }
+  // ── Public API ──────────────────────────────────────────────────────
+  /** Connect (or reconnect) to a session's WebSocket. */
+  connectToSession(sessionId: string | null): void {
+    if (this.connectTimeout) {
+      clearTimeout(this.connectTimeout);
+      this.connectTimeout = null;
+    }
+    this.disconnectWebSocket();
+    this.currentSessionId = sessionId;
+    if (sessionId) {
+      this.retries = 0;
+      this.reconnectDelay = WS_RECONNECT_DELAY;
+      this.connectTimeout = setTimeout(() => {
+        this.connectTimeout = null;
+        if (this.currentSessionId === sessionId) {
+          this.createWebSocket(sessionId);
+        }
+      }, 100);
+    }
+  }
+  /** Approve / reject tools. Called directly from the UI. */
+  async approveTools(
+    sessionId: string,
+    approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>,
+  ): Promise<boolean> {
+    try {
+      const res = await apiFetch('/api/approve', {
+        method: 'POST',
+        body: JSON.stringify({ session_id: sessionId, approvals }),
+      });
+      return res.ok;
+    } catch (e) {
+      logger.error('Approval request failed:', e);
+      return false;
+    }
+  }
+  /** Clean up everything. */
+  destroy(): void {
+    if (this.connectTimeout) {
+      clearTimeout(this.connectTimeout);
+      this.connectTimeout = null;
+    }
+    if (this.boundVisibilityHandler) {
+      document.removeEventListener('visibilitychange', this.boundVisibilityHandler);
+      this.boundVisibilityHandler = null;
+    }
+    this.disconnectWebSocket();
+    this.closeActiveStream();
+  }
+  // ── ChatTransport interface ─────────────────────────────────────────
+  async sendMessages(
+    options: {
+      trigger: 'submit-message' | 'regenerate-message';
+      chatId: string;
+      messageId: string | undefined;
+      messages: UIMessage[];
+      abortSignal: AbortSignal | undefined;
+    } & ChatRequestOptions,
+  ): Promise<ReadableStream<UIMessageChunk>> {
+    const sessionId = options.chatId;
+    // Close any previously active stream (e.g. user sent new msg during approval)
+    this.closeActiveStream();
+    // Track generation to protect against late cancel from a stale stream
+    const gen = ++this.streamGeneration;
+    logger.log(`sendMessages: gen=${gen}, awaitingProcessing=${this.awaitingProcessing}, abortedGen=${this.abortedGeneration}`);
+    // Wire up abort signal to interrupt the backend and close the stream
+    if (options.abortSignal) {
+      const onAbort = () => {
+        if (this.streamGeneration !== gen) return;
+        logger.log(`Stream aborted by user (gen=${gen})`);
+        this.interruptBackend(sessionId);
+        this.endTextPart();
+        if (this.streamController) {
+          this.enqueue({ type: 'finish-step' });
+          this.enqueue({ type: 'finish', finishReason: 'stop' });
+          this.closeActiveStream();
+        }
+        this.awaitingProcessing = true;
+        this.abortedGeneration = this.streamGeneration;
+        logger.log(`Abort complete: awaitingProcessing=true, abortedGen=${this.abortedGeneration}`);
+        this.sideChannel.onProcessingDone();
+      };
+      if (options.abortSignal.aborted) {
+        onAbort();
+      } else {
+        options.abortSignal.addEventListener('abort', onAbort, { once: true });
+      }
+    }
+    // Create the stream BEFORE the POST so WebSocket events arriving
+    // while the HTTP request is in-flight are captured immediately.
+    const stream = new ReadableStream<UIMessageChunk>({
+      start: (controller) => {
+        this.streamController = controller;
+        this.textPartId = null;
+      },
+      cancel: () => {
+        if (this.streamGeneration === gen) {
+          this.streamController = null;
+          this.textPartId = null;
+        }
+      },
+    });
+    // Extract the latest user text from the messages array
+    const lastUserMsg = [...options.messages].reverse().find(m => m.role === 'user');
+    const text = lastUserMsg
+      ? lastUserMsg.parts
+          .filter((p): p is Extract<typeof p, { type: 'text' }> => p.type === 'text')
+          .map(p => p.text)
+          .join('')
+      : '';
+    // POST to the existing backend endpoint
+    try {
+      await apiFetch('/api/submit', {
+        method: 'POST',
+        body: JSON.stringify({ session_id: sessionId, text }),
+      });
+    } catch (e) {
+      logger.error('Submit failed:', e);
+      this.enqueue({ type: 'error', errorText: 'Failed to send message' });
+      this.closeActiveStream();
+    }
+    return stream;
+  }
+  async reconnectToStream(): Promise<ReadableStream<UIMessageChunk> | null> {
+    return null;
+  }
+  /** Ask the backend to interrupt the current generation. Fire-and-forget. */
+  private interruptBackend(sessionId: string): void {
+    apiFetch(`/api/interrupt/${sessionId}`, { method: 'POST' }).catch((e) =>
+      logger.warn('Interrupt request failed:', e),
+    );
+  }
+  // ── WebSocket lifecycle ─────────────────────────────────────────────
+  private createWebSocket(sessionId: string): void {
+    if (this.ws?.readyState === WebSocket.OPEN || this.ws?.readyState === WebSocket.CONNECTING) {
+      return;
+    }
+    const wsUrl = getWebSocketUrl(sessionId);
+    logger.log('WS transport connecting:', wsUrl);
+    const ws = new WebSocket(wsUrl);
+    ws.onopen = () => {
+      logger.log('WS transport connected');
+      this.sideChannel.onConnectionChange(true);
+      this.reconnectDelay = WS_RECONNECT_DELAY;
+      this.retries = 0;
+      this.startPing();
+    };
+    ws.onmessage = (evt) => {
+      try {
+        const raw = JSON.parse(evt.data);
+        if (raw.type === 'pong') return;
+        this.handleEvent(raw as AgentEvent);
+      } catch (e) {
+        logger.error('WS parse error:', e);
+      }
+    };
+    ws.onerror = (err) => logger.error('WS error:', err);
+    ws.onclose = (evt) => {
+      logger.log('WS closed', evt.code, evt.reason);
+      this.sideChannel.onConnectionChange(false);
+      this.stopPing();
+      const noRetry = [1000, 4001, 4003, 4004];
+      if (evt.code === 4004 && sessionId) {
+        this.sideChannel.onSessionDead(sessionId);
+        return;
+      }
+      if (!noRetry.includes(evt.code) && this.currentSessionId === sessionId) {
+        this.retries += 1;
+        if (this.retries > WS_MAX_RETRIES) {
+          logger.warn('WS max retries reached');
+          this.sideChannel.onSessionDead(sessionId);
+          return;
+        }
+        this.reconnectTimeout = setTimeout(() => {
+          this.reconnectDelay = Math.min(this.reconnectDelay * 2, WS_MAX_RECONNECT_DELAY);
+          this.createWebSocket(sessionId);
+        }, this.reconnectDelay);
+      }
+    };
+    this.ws = ws;
+  }
+  private disconnectWebSocket(): void {
+    if (this.reconnectTimeout) {
+      clearTimeout(this.reconnectTimeout);
+      this.reconnectTimeout = null;
+    }
+    this.stopPing();
+    if (this.ws) {
+      this.ws.close();
+      this.ws = null;
+    }
+    this.sideChannel.onConnectionChange(false);
+  }
+  private startPing(): void {
+    this.stopPing();
+    this.pingInterval = setInterval(() => {
+      if (this.ws?.readyState === WebSocket.OPEN) {
+        this.ws.send(JSON.stringify({ type: 'ping' }));
+      }
+    }, WS_PING_INTERVAL);
+  }
+  private stopPing(): void {
+    if (this.pingInterval) {
+      clearInterval(this.pingInterval);
+      this.pingInterval = null;
+    }
+  }
+  // ── Stream helpers ──────────────────────────────────────────────────
+  private closeActiveStream(): void {
+    if (this.streamController) {
+      try {
+        this.streamController.close();
+      } catch {
+        // already closed
+      }
+      this.streamController = null;
+      this.textPartId = null;
+    }
+  }
+  private enqueue(chunk: UIMessageChunk): void {
+    try {
+      this.streamController?.enqueue(chunk);
+    } catch {
+      // stream already closed
+    }
+  }
+  private endTextPart(): void {
+    if (this.textPartId) {
+      this.enqueue({ type: 'text-end', id: this.textPartId });
+      this.textPartId = null;
+    }
+  }
+  // ── Event → UIMessageChunk mapping ──────────────────────────────────
+  private static readonly STREAM_EVENTS = new Set([
+    'assistant_chunk', 'assistant_stream_end', 'assistant_message',
+    'tool_call', 'tool_output', 'approval_required', 'tool_state_change',
+    'turn_complete', 'error',
+  ]);
+  private handleEvent(event: AgentEvent): void {
+    // After an abort, ignore stale stream events until the next 'processing'
+    if (this.awaitingProcessing && WebSocketChatTransport.STREAM_EVENTS.has(event.event_type)) {
+      logger.log(`Filtering stale "${event.event_type}" (gen=${this.streamGeneration}, aborted=${this.abortedGeneration})`);
+      return;
+    }
+    switch (event.event_type) {
+      // ── Side-channel only events ────────────────────────────────
+      case 'ready':
+        this.sideChannel.onReady();
+        break;
+      case 'shutdown':
+        this.sideChannel.onShutdown();
+        this.closeActiveStream();
+        break;
+      case 'interrupted':
+        // Don't close the stream here — the abort handler already did, and
+        // a new stream for the next user message may already exist.
+        // Closing here would destroy the NEWER stream, causing the next
+        // response to be silently dropped.
+        this.sideChannel.onProcessingDone();
+        break;
+      case 'undo_complete':
+        this.endTextPart();
+        this.closeActiveStream();
+        this.sideChannel.onUndoComplete();
+        break;
+      case 'compacted':
+        this.sideChannel.onCompacted(
+          (event.data?.old_tokens as number) || 0,
+          (event.data?.new_tokens as number) || 0,
+        );
+        break;
+      case 'plan_update':
+        this.sideChannel.onPlanUpdate(
+          (event.data?.plan as Array<{ id: string; content: string; status: string }>) || [],
+        );
+        break;
+      case 'tool_log':
+        this.sideChannel.onToolLog(
+          (event.data?.tool as string) || '',
+          (event.data?.log as string) || '',
+        );
+        break;
+      // ── Chat stream events ──────────────────────────────────────
+      case 'processing':
+        if (this.awaitingProcessing) {
+          if (this.streamGeneration <= this.abortedGeneration) {
+            logger.log(`Ignoring stale "processing" (gen=${this.streamGeneration} <= aborted=${this.abortedGeneration})`);
+            break;
+          }
+          logger.log(`Accepting "processing" for new generation (gen=${this.streamGeneration}, aborted=${this.abortedGeneration})`);
+          this.awaitingProcessing = false;
+        }
+        this.sideChannel.onProcessing();
+        if (this.streamController) {
+          this.enqueue({
+            type: 'start',
+            messageMetadata: { createdAt: new Date().toISOString() },
+          });
+          this.enqueue({ type: 'start-step' });
+        }
+        break;
+      case 'assistant_chunk': {
+        const delta = (event.data?.content as string) || '';
+        if (!delta || !this.streamController) break;
+        if (!this.textPartId) {
+          this.textPartId = nextPartId('text');
+          this.enqueue({ type: 'text-start', id: this.textPartId });
+          this.sideChannel.onStreaming();
+        }
+        this.enqueue({ type: 'text-delta', id: this.textPartId, delta });
+        break;
+      }
+      case 'assistant_stream_end':
+        this.endTextPart();
+        break;
+      case 'assistant_message': {
+        const content = (event.data?.content as string) || '';
+        if (!content || !this.streamController) break;
+        const id = nextPartId('text');
+        this.enqueue({ type: 'text-start', id });
+        this.enqueue({ type: 'text-delta', id, delta: content });
+        this.enqueue({ type: 'text-end', id });
+        break;
+      }
+      case 'tool_call': {
+        if (!this.streamController) break;
+        const toolName = (event.data?.tool as string) || 'unknown';
+        const toolCallId = (event.data?.tool_call_id as string) || '';
+        const args = (event.data?.arguments as Record<string, unknown>) || {};
+        if (toolName === 'plan_tool') break;
+        this.endTextPart();
+        this.enqueue({ type: 'tool-input-start', toolCallId, toolName, dynamic: true });
+        this.enqueue({ type: 'tool-input-available', toolCallId, toolName, input: args, dynamic: true });
+        this.sideChannel.onToolRunning(toolName);
+        this.sideChannel.onToolCallPanel(toolName, args as Record<string, unknown>);
+        break;
+      }
+      case 'tool_output': {
+        if (!this.streamController) break;
+        const toolCallId = (event.data?.tool_call_id as string) || '';
+        const output = (event.data?.output as string) || '';
+        const success = event.data?.success as boolean;
+        const toolName = (event.data?.tool as string) || '';
+        if (toolName === 'plan_tool' || toolCallId.startsWith('plan_tool')) break;
+        if (success) {
+          this.enqueue({ type: 'tool-output-available', toolCallId, output, dynamic: true });
+        } else {
+          this.enqueue({ type: 'tool-output-error', toolCallId, errorText: output, dynamic: true });
+        }
+        this.sideChannel.onToolOutputPanel(toolName, toolCallId, output, success);
+        break;
+      }
+      case 'approval_required': {
+        const tools = event.data?.tools as Array<{
+          tool: string;
+          arguments: Record<string, unknown>;
+          tool_call_id: string;
+        }>;
+        if (!tools || !this.streamController) break;
+        this.endTextPart();
+        for (const t of tools) {
+          this.enqueue({ type: 'tool-input-start', toolCallId: t.tool_call_id, toolName: t.tool, dynamic: true });
+          this.enqueue({ type: 'tool-input-available', toolCallId: t.tool_call_id, toolName: t.tool, input: t.arguments, dynamic: true });
+          this.enqueue({ type: 'tool-approval-request', approvalId: `approval-${t.tool_call_id}`, toolCallId: t.tool_call_id });
+        }
+        this.sideChannel.onApprovalRequired(tools);
+        this.sideChannel.onProcessingDone();
+        break;
+      }
+      case 'tool_state_change': {
+        const tcId = (event.data?.tool_call_id as string) || '';
+        const state = (event.data?.state as string) || '';
+        const jobUrl = (event.data?.jobUrl as string) || undefined;
+        if (tcId.startsWith('plan_tool')) break;
+        if (jobUrl && tcId) {
+          useAgentStore.getState().setJobUrl(tcId, jobUrl);
+        }
+        if (this.streamController && (state === 'rejected' || state === 'abandoned')) {
+          this.enqueue({ type: 'tool-output-denied', toolCallId: tcId });
+        }
+        break;
+      }
+      case 'turn_complete':
+        this.endTextPart();
+        if (this.streamController) {
+          this.enqueue({ type: 'finish-step' });
+          this.enqueue({ type: 'finish', finishReason: 'stop' });
+          this.closeActiveStream();
+        }
+        this.sideChannel.onProcessingDone();
+        break;
+      case 'error': {
+        const errorMsg = (event.data?.error as string) || 'Unknown error';
+        this.sideChannel.onError(errorMsg);
+        if (this.streamController) {
+          this.enqueue({ type: 'error', errorText: errorMsg });
+        }
+        this.sideChannel.onProcessingDone();
+        break;
+      }
+      default:
+        logger.log('WS transport: unknown event', event);
+    }
+  }
+}

frontend/src/main.tsx CHANGED Viewed

@@ -3,13 +3,23 @@ import { createRoot } from 'react-dom/client';
 import { ThemeProvider } from '@mui/material/styles';
 import CssBaseline from '@mui/material/CssBaseline';
 import App from './App';
-import theme from './theme';
-createRoot(document.getElementById('root')!).render(
-  <StrictMode>
     <ThemeProvider theme={theme}>
       <CssBaseline />
       <App />
     </ThemeProvider>
   </StrictMode>
 );

 import { ThemeProvider } from '@mui/material/styles';
 import CssBaseline from '@mui/material/CssBaseline';
 import App from './App';
+import { darkTheme, lightTheme } from './theme';
+import { useLayoutStore } from './store/layoutStore';
+function Root() {
+  const themeMode = useLayoutStore((s) => s.themeMode);
+  const theme = themeMode === 'light' ? lightTheme : darkTheme;
+  return (
     <ThemeProvider theme={theme}>
       <CssBaseline />
       <App />
     </ThemeProvider>
+  );
+}
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <Root />
   </StrictMode>
 );

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -1,5 +1,16 @@
 import { create } from 'zustand';
-import type { Message, ApprovalBatch, User, TraceLog } from '@/types/agent';
 export interface PlanItem {
   id: string;
@@ -7,254 +18,158 @@ export interface PlanItem {
   status: 'pending' | 'in_progress' | 'completed';
 }
-interface PanelTab {
-  id: string;
-  title: string;
   content: string;
-  language?: string;
-  parameters?: any;
 }
 interface AgentStore {
-  // State per session (keyed by session ID)
-  messagesBySession: Record<string, Message[]>;
   isProcessing: boolean;
   isConnected: boolean;
-  pendingApprovals: ApprovalBatch | null;
   user: User | null;
   error: string | null;
-  traceLogs: TraceLog[];
-  panelContent: { title: string; content: string; language?: string; parameters?: any } | null;
-  panelTabs: PanelTab[];
-  activePanelTab: string | null;
   plan: PlanItem[];
-  currentTurnMessageId: string | null; // Track the current turn's assistant message
   // Actions
-  addMessage: (sessionId: string, message: Message) => void;
-  updateMessage: (sessionId: string, messageId: string, updates: Partial<Message>) => void;
-  clearMessages: (sessionId: string) => void;
   setProcessing: (isProcessing: boolean) => void;
   setConnected: (isConnected: boolean) => void;
-  setPendingApprovals: (approvals: ApprovalBatch | null) => void;
   setUser: (user: User | null) => void;
   setError: (error: string | null) => void;
-  getMessages: (sessionId: string) => Message[];
-  addTraceLog: (log: TraceLog) => void;
-  updateTraceLog: (toolName: string, updates: Partial<TraceLog>) => void;
-  clearTraceLogs: () => void;
-  setPanelContent: (content: { title: string; content: string; language?: string; parameters?: any } | null) => void;
-  setPanelTab: (tab: PanelTab) => void;
-  setActivePanelTab: (tabId: string) => void;
-  clearPanelTabs: () => void;
-  removePanelTab: (tabId: string) => void;
   setPlan: (plan: PlanItem[]) => void;
-  setCurrentTurnMessageId: (id: string | null) => void;
-  updateCurrentTurnTrace: (sessionId: string) => void;
-  showToolOutput: (log: TraceLog) => void;
 }
-export const useAgentStore = create<AgentStore>((set, get) => ({
-  messagesBySession: {},
   isProcessing: false,
   isConnected: false,
-  pendingApprovals: null,
   user: null,
   error: null,
-  traceLogs: [],
-  panelContent: null,
-  panelTabs: [],
-  activePanelTab: null,
-  plan: [],
-  currentTurnMessageId: null,
-  addMessage: (sessionId: string, message: Message) => {
-    set((state) => {
-      const currentMessages = state.messagesBySession[sessionId] || [];
-      return {
-        messagesBySession: {
-          ...state.messagesBySession,
-          [sessionId]: [...currentMessages, message],
-        },
-      };
-    });
-  },
-  updateMessage: (sessionId: string, messageId: string, updates: Partial<Message>) => {
-    set((state) => {
-      const currentMessages = state.messagesBySession[sessionId] || [];
-      const updatedMessages = currentMessages.map((msg) =>
-        msg.id === messageId ? { ...msg, ...updates } : msg
-      );
-      return {
-        messagesBySession: {
-          ...state.messagesBySession,
-          [sessionId]: updatedMessages,
-        },
-      };
-    });
-  },
-  clearMessages: (sessionId: string) => {
-    set((state) => ({
-      messagesBySession: {
-        ...state.messagesBySession,
-        [sessionId]: [],
-      },
-    }));
-  },
-  setProcessing: (isProcessing: boolean) => {
-    set({ isProcessing });
-  },
-  setConnected: (isConnected: boolean) => {
-    set({ isConnected });
-  },
-  setPendingApprovals: (approvals: ApprovalBatch | null) => {
-    set({ pendingApprovals: approvals });
   },
-  setUser: (user: User | null) => {
-    set({ user });
-  },
-  setError: (error: string | null) => {
-    set({ error });
-  },
-  getMessages: (sessionId: string) => {
-    return get().messagesBySession[sessionId] || [];
-  },
-  addTraceLog: (log: TraceLog) => {
-    set((state) => ({
-      traceLogs: [...state.traceLogs, log],
-    }));
-  },
-  updateTraceLog: (toolName: string, updates: Partial<TraceLog>) => {
-    set((state) => {
-      // Find the last trace log with this tool name and update it
-      const traceLogs = [...state.traceLogs];
-      for (let i = traceLogs.length - 1; i >= 0; i--) {
-        if (traceLogs[i].tool === toolName && traceLogs[i].type === 'call') {
-          traceLogs[i] = { ...traceLogs[i], ...updates };
-          break;
-        }
-      }
-      return { traceLogs };
-    });
-  },
-  clearTraceLogs: () => {
-    set({ traceLogs: [] });
-  },
-  setPanelContent: (content) => {
-    set({ panelContent: content });
-  },
-  setPanelTab: (tab: PanelTab) => {
-    set((state) => {
-      const existingIndex = state.panelTabs.findIndex(t => t.id === tab.id);
-      let newTabs: PanelTab[];
-      if (existingIndex >= 0) {
-        // Update existing tab
-        newTabs = [...state.panelTabs];
-        newTabs[existingIndex] = tab;
-      } else {
-        // Add new tab
-        newTabs = [...state.panelTabs, tab];
-      }
-      return {
-        panelTabs: newTabs,
-        activePanelTab: state.activePanelTab || tab.id, // Auto-select first tab
-      };
-    });
-  },
-  setActivePanelTab: (tabId: string) => {
-    set({ activePanelTab: tabId });
-  },
-  clearPanelTabs: () => {
-    set({ panelTabs: [], activePanelTab: null });
-  },
-  removePanelTab: (tabId: string) => {
-    set((state) => {
-      const newTabs = state.panelTabs.filter(t => t.id !== tabId);
-      // If we removed the active tab, switch to another tab or null
-      let newActiveTab = state.activePanelTab;
-      if (state.activePanelTab === tabId) {
-        newActiveTab = newTabs.length > 0 ? newTabs[newTabs.length - 1].id : null;
-      }
-      return {
-        panelTabs: newTabs,
-        activePanelTab: newActiveTab,
-      };
-    });
   },
-  setPlan: (plan: PlanItem[]) => {
-    set({ plan });
-  },
-  setCurrentTurnMessageId: (id: string | null) => {
-    set({ currentTurnMessageId: id });
-  },
-  updateCurrentTurnTrace: (sessionId: string) => {
-    const state = get();
-    if (state.currentTurnMessageId) {
-      const currentMessages = state.messagesBySession[sessionId] || [];
-      const updatedMessages = currentMessages.map((msg) =>
-        msg.id === state.currentTurnMessageId
-          ? { ...msg, trace: state.traceLogs.length > 0 ? [...state.traceLogs] : undefined }
-          : msg
-      );
-      set({
-        messagesBySession: {
-          ...state.messagesBySession,
-          [sessionId]: updatedMessages,
-        },
-      });
-    }
-  },
-  showToolOutput: (log: TraceLog) => {
-    // Show tool output in the right panel - only ONE tool output tab at a time
-    const state = get();
-    // Determine language based on content
-    let language = 'text';
-    const content = log.output || '';
-    // Check if content looks like JSON
-    if (content.trim().startsWith('{') || content.trim().startsWith('[') || content.includes('```json')) {
-      language = 'json';
-    }
-    // Check if content has markdown tables or formatting
-    else if (content.includes('|') && content.includes('---') || content.includes('```')) {
-      language = 'markdown';
-    }
-    // Remove any existing tool output tab (only keep one)
-    const otherTabs = state.panelTabs.filter(t => t.id !== 'tool_output');
-    // Create/replace the single tool output tab
-    const newTab = {
-      id: 'tool_output',
-      title: log.tool,
-      content: content || 'No output available',
-      language,
-    };
-    set({
-      panelTabs: [...otherTabs, newTab],
-      activePanelTab: 'tool_output',
-    });
   },
 }));

+/**
+ * Agent store — manages UI state that is NOT handled by the Vercel AI SDK.
+ *
+ * Message state (messages, streaming, tool calls) is now managed by useChat().
+ * This store only handles:
+ *  - Connection / processing flags
+ *  - Panel state (right panel — single-artifact pattern)
+ *  - Plan state
+ *  - User info / error banners
+ *  - Edited scripts (for hf_jobs code editing)
+ */
 import { create } from 'zustand';
+import type { User } from '@/types/agent';
 export interface PlanItem {
   id: string;
   status: 'pending' | 'in_progress' | 'completed';
 }
+export interface PanelSection {
   content: string;
+  language: string;
+}
+export interface PanelData {
+  title: string;
+  script?: PanelSection;
+  output?: PanelSection;
+  parameters?: Record<string, unknown>;
+}
+export type PanelView = 'script' | 'output';
+export interface LLMHealthError {
+  error: string;
+  errorType: 'auth' | 'credits' | 'rate_limit' | 'network' | 'unknown';
+  model: string;
 }
+export type ActivityStatus =
+  | { type: 'idle' }
+  | { type: 'thinking' }
+  | { type: 'tool'; toolName: string }
+  | { type: 'waiting-approval' }
+  | { type: 'streaming' };
 interface AgentStore {
+  // Global UI flags
   isProcessing: boolean;
   isConnected: boolean;
+  activityStatus: ActivityStatus;
   user: User | null;
   error: string | null;
+  llmHealthError: LLMHealthError | null;
+  // Right panel (single-artifact pattern)
+  panelData: PanelData | null;
+  panelView: PanelView;
+  panelEditable: boolean;
+  // Plan
   plan: PlanItem[];
+  // Edited scripts (tool_call_id -> edited content)
+  editedScripts: Record<string, string>;
+  // Job URLs (tool_call_id -> job URL) for HF jobs
+  jobUrls: Record<string, string>;
   // Actions
   setProcessing: (isProcessing: boolean) => void;
   setConnected: (isConnected: boolean) => void;
+  setActivityStatus: (status: ActivityStatus) => void;
   setUser: (user: User | null) => void;
   setError: (error: string | null) => void;
+  setLlmHealthError: (error: LLMHealthError | null) => void;
+  setPanel: (data: PanelData, view?: PanelView, editable?: boolean) => void;
+  setPanelView: (view: PanelView) => void;
+  setPanelOutput: (output: PanelSection) => void;
+  updatePanelScript: (content: string) => void;
+  lockPanel: () => void;
+  clearPanel: () => void;
   setPlan: (plan: PlanItem[]) => void;
+  setEditedScript: (toolCallId: string, content: string) => void;
+  getEditedScript: (toolCallId: string) => string | undefined;
+  clearEditedScripts: () => void;
+  setJobUrl: (toolCallId: string, jobUrl: string) => void;
+  getJobUrl: (toolCallId: string) => string | undefined;
 }
+export const useAgentStore = create<AgentStore>()((set, get) => ({
   isProcessing: false,
   isConnected: false,
+  activityStatus: { type: 'idle' },
   user: null,
   error: null,
+  llmHealthError: null,
+  panelData: null,
+  panelView: 'script',
+  panelEditable: false,
+  plan: [],
+  editedScripts: {},
+  jobUrls: {},
+  // ── Global flags ──────────────────────────────────────────────────
+  setProcessing: (isProcessing) => {
+    const current = get().activityStatus;
+    const preserveStatus = current.type === 'waiting-approval';
+    set({ isProcessing, ...(!isProcessing && !preserveStatus ? { activityStatus: { type: 'idle' } } : {}) });
   },
+  setConnected: (isConnected) => set({ isConnected }),
+  setActivityStatus: (status) => set({ activityStatus: status }),
+  setUser: (user) => set({ user }),
+  setError: (error) => set({ error }),
+  setLlmHealthError: (error) => set({ llmHealthError: error }),
+  // ── Panel (single-artifact) ───────────────────────────────────────
+  setPanel: (data, view, editable) => set({
+    panelData: data,
+    panelView: view ?? (data.script ? 'script' : 'output'),
+    panelEditable: editable ?? false,
+  }),
+  setPanelView: (view) => set({ panelView: view }),
+  setPanelOutput: (output) => set((state) => ({
+    panelData: state.panelData ? { ...state.panelData, output } : null,
+  })),
+  updatePanelScript: (content) => set((state) => ({
+    panelData: state.panelData?.script
+      ? { ...state.panelData, script: { ...state.panelData.script, content } }
+      : state.panelData,
+  })),
+  lockPanel: () => set({ panelEditable: false }),
+  clearPanel: () => set({ panelData: null, panelView: 'script', panelEditable: false }),
+  // ── Plan ──────────────────────────────────────────────────────────
+  setPlan: (plan) => set({ plan }),
+  // ── Edited scripts ────────────────────────────────────────────────
+  setEditedScript: (toolCallId, content) => {
+    set((state) => ({
+      editedScripts: { ...state.editedScripts, [toolCallId]: content },
+    }));
   },
+  getEditedScript: (toolCallId) => get().editedScripts[toolCallId],
+  clearEditedScripts: () => set({ editedScripts: {} }),
+  // ── Job URLs ────────────────────────────────────────────────────────
+  setJobUrl: (toolCallId, jobUrl) => {
+    set((state) => ({
+      jobUrls: { ...state.jobUrls, [toolCallId]: jobUrl },
+    }));
   },
+  getJobUrl: (toolCallId) => get().jobUrls[toolCallId],
 }));