Spaces:

onewayto
/

water2

Sleeping

App Files Files Community

onewayto commited on 16 days ago

Commit

683b580

verified ·

1 Parent(s): 174be0d

Upload 49 files

Browse files

Files changed (49) hide show

.gitignore +71 -0
.python-version +1 -0
Dockerfile +32 -0
Procfile +1 -0
__init__.py +1 -0
agent/README.md +21 -0
agent/__init__.py +7 -0
agent/config.py +83 -0
agent/context_manager/__init__.py +7 -0
agent/context_manager/manager.py +197 -0
agent/core/__init__.py +12 -0
agent/core/agent_loop.py +711 -0
agent/core/session.py +255 -0
agent/core/session_uploader.py +202 -0
agent/core/tools.py +337 -0
agent/main.py +567 -0
agent/prompts/system_prompt.yaml +170 -0
agent/prompts/system_prompt_v2.yaml +626 -0
agent/tools/__init__.py +39 -0
agent/tools/dataset_tools.py +445 -0
agent/tools/docs_tools.py +956 -0
agent/tools/github_find_examples.py +499 -0
agent/tools/github_list_repos.py +287 -0
agent/tools/github_read_file.py +348 -0
agent/tools/hf_repo_files_tool.py +322 -0
agent/tools/hf_repo_git_tool.py +663 -0
agent/tools/jobs_tool.py +1042 -0
agent/tools/plan_tool.py +138 -0
agent/tools/private_hf_repo_tools.py +650 -0
agent/tools/types.py +16 -0
agent/tools/utilities.py +142 -0
agent/utils/__init__.py +3 -0
agent/utils/reliability_checks.py +16 -0
agent/utils/terminal_display.py +155 -0
configs/main_agent_config.json +17 -0
dependencies.py +144 -0
main.py +96 -0
models.py +87 -0
pyproject.toml +51 -0
requirements.txt +25 -0
routes/__init__.py +1 -0
routes/__pycache__/__init__.cpython-313.pyc +0 -0
routes/__pycache__/agent.cpython-313.pyc +0 -0
routes/agent.py +404 -0
routes/auth.py +171 -0
session_manager.py +376 -0
start.sh +26 -0
uv.lock +0 -0
websocket.py +62 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,71 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+.pytest_cache/
+.mypy_cache/
+.tox/
+.coverage
+htmlcov/
+.ipynb_checkpoints/
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+# Environment and Secrets
+.env
+.env.local
+.env.*
+!.env.example
+*.local
+credentials*.json
+# OS-specific
+.DS_Store
+Thumbs.db
+*.swp
+# IDE-specific
+.vscode/
+.idea/
+.cursor/
+.history/
+*.sublime-project
+*.sublime-workspace
+# Frontend (Node.js)
+frontend/node_modules/
+frontend/dist/
+frontend/.cache/
+frontend/*.local
+frontend/.eslintcache
+frontend/npm-debug.log*
+frontend/yarn-debug.log*
+frontend/yarn-error.log*
+# Docker
+.docker/
+# Project-specific
+session_logs/
+/logs
+hf-agent-leaderboard/
+skills/
+.claude/
+*.jsonl
+*.csv
+# ML / Data
+data/
+datasets/
+models/
+checkpoint-*/
+runs/
+wandb/
+frontend/tsconfig.tsbuildinfo

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+# HF Agent Backend - Docker Image
+FROM python:3.12-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Grant full write access (chmod 777) to /app directory and set root as owner
+RUN chmod -R 777 /app && chown -R root:root /app
+# Run as root user
+USER root
+# Expose port
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/api/health')" || exit 1
+# Run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Backend package for HF Agent web interface

agent/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# Agent
+Async agent loop with LiteLLM.
+## Architecture
+**Queue-based async system:**
+- Submissions in (user input) → Agent Loop → Events output for possible UI updates
+- Session maintains state (context + tools) for possible future Context Engineering
+- Handlers operations like (USER_INPUT, INTERRUPT, COMPACT, UNDO, SHUTDOWN) for possible UI control
+## Components
+| Component | Purpose | Long Term Goal |
+|-----------|---------|----------------|
+| **`agent_loop.py`** | Core agentic loop: processes user input, calls LLM via LiteLLM, executes tool calls iteratively until completion, emits events | Support parallel tool execution, streaming responses, and advanced reasoning patterns |
+| **`session.py`** | Maintains session state and interaction with potential UI (context, config, event queue), handles interrupts, assigns unique session IDs for tracing | Enable plugging in different UIs (CLI, web, API, programmatic etc.) |
+| **`tools.py`** | `ToolRouter` manages potential built-in tools (e.g. bash, read_file, write_file which are dummy implementations rn) + MCP tools, converts specs to OpenAI format | Be the place for tools that can be used by the agent. All crazy tool design happens here. |
+| **`context_manager/`** | Manages conversation history, very rudimentary context engineering support | Implement intelligent context engineering to keep the agent on track |
+| **`config.py`** | Loads JSON config for the agent | Support different configs etc. |
+| **`main.py`** | Interactive CLI with async queue architecture (submission→agent, agent→events) (simple way to interact with the agent now)| Serve as reference implementation for other UIs (web, API, programmatic) |

agent/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""
+HF Agent - Main agent module
+"""
+from agent.core.agent_loop import submission_loop
+__all__ = ["submission_loop"]

agent/config.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import json
+import os
+import re
+from typing import Any, Union
+from dotenv import load_dotenv
+from fastmcp.mcp_config import (
+    RemoteMCPServer,
+    StdioMCPServer,
+)
+from pydantic import BaseModel
+# These two are the canonical server config types for MCP servers.
+MCPServerConfig = Union[StdioMCPServer, RemoteMCPServer]
+class Config(BaseModel):
+    """Configuration manager"""
+    model_name: str
+    mcpServers: dict[str, MCPServerConfig] = {}
+    save_sessions: bool = True
+    session_dataset_repo: str = "akseljoonas/hf-agent-sessions"
+    auto_save_interval: int = 3  # Save every N user turns (0 = disabled)
+    yolo_mode: bool = False  # Auto-approve all tool calls without confirmation
+    # Permission control parameters
+    confirm_cpu_jobs: bool = True
+    auto_file_upload: bool = False
+def substitute_env_vars(obj: Any) -> Any:
+    """
+    Recursively substitute environment variables in any data structure.
+    Supports ${VAR_NAME} syntax for required variables and ${VAR_NAME:-default} for optional.
+    """
+    if isinstance(obj, str):
+        pattern = r"\$\{([^}:]+)(?::(-)?([^}]*))?\}"
+        def replacer(match):
+            var_name = match.group(1)
+            has_default = match.group(2) is not None
+            default_value = match.group(3) if has_default else None
+            env_value = os.environ.get(var_name)
+            if env_value is not None:
+                return env_value
+            elif has_default:
+                return default_value or ""
+            else:
+                raise ValueError(
+                    f"Environment variable '{var_name}' is not set. "
+                    f"Add it to your .env file."
+                )
+        return re.sub(pattern, replacer, obj)
+    elif isinstance(obj, dict):
+        return {key: substitute_env_vars(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [substitute_env_vars(item) for item in obj]
+    return obj
+def load_config(config_path: str = "config.json") -> Config:
+    """
+    Load configuration with environment variable substitution.
+    Use ${VAR_NAME} in your JSON for any secret.
+    Automatically loads from .env file.
+    """
+    # Load environment variables from .env file
+    load_dotenv()
+    with open(config_path, "r") as f:
+        raw_config = json.load(f)
+    config_with_env = substitute_env_vars(raw_config)
+    return Config.model_validate(config_with_env)

agent/context_manager/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""
+Context manager for handling conversation history
+"""
+from agent.context_manager.manager import ContextManager
+__all__ = ["ContextManager"]

agent/context_manager/manager.py ADDED Viewed

	@@ -0,0 +1,197 @@

+"""
+Context management for conversation history
+"""
+import logging
+import os
+import zoneinfo
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+import yaml
+from jinja2 import Template
+from litellm import Message, acompletion
+logger = logging.getLogger(__name__)
+# Module-level cache for HF username — avoids repeating the slow whoami() call
+_hf_username_cache: str | None = None
+_HF_WHOAMI_URL = "https://huggingface.co/api/whoami-v2"
+_HF_WHOAMI_TIMEOUT = 5  # seconds
+def _get_hf_username() -> str:
+    """Return the HF username, cached after the first call.
+    Uses subprocess + curl to avoid Python HTTP client IPv6 issues that
+    cause 40+ second hangs (httpx/urllib try IPv6 first which times out
+    at OS level before falling back to IPv4 — the "Happy Eyeballs" problem).
+    """
+    import json
+    import subprocess
+    import time as _t
+    global _hf_username_cache
+    if _hf_username_cache is not None:
+        return _hf_username_cache
+    hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
+    if not hf_token:
+        logger.warning("No HF_TOKEN set, using 'unknown' as username")
+        _hf_username_cache = "unknown"
+        return _hf_username_cache
+    t0 = _t.monotonic()
+    try:
+        result = subprocess.run(
+            [
+                "curl",
+                "-s",
+                "-4",  # force IPv4
+                "-m",
+                str(_HF_WHOAMI_TIMEOUT),  # max time
+                "-H",
+                f"Authorization: Bearer {hf_token}",
+                _HF_WHOAMI_URL,
+            ],
+            capture_output=True,
+            text=True,
+            timeout=_HF_WHOAMI_TIMEOUT + 2,
+        )
+        t1 = _t.monotonic()
+        if result.returncode == 0 and result.stdout:
+            data = json.loads(result.stdout)
+            _hf_username_cache = data.get("name", "unknown")
+            logger.info(
+                f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s"
+            )
+        else:
+            logger.warning(
+                f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
+            )
+            _hf_username_cache = "unknown"
+    except Exception as e:
+        t1 = _t.monotonic()
+        logger.warning(f"HF whoami failed in {t1 - t0:.2f}s: {e}")
+        _hf_username_cache = "unknown"
+    return _hf_username_cache
+class ContextManager:
+    """Manages conversation context and message history for the agent"""
+    def __init__(
+        self,
+        max_context: int = 180_000,
+        compact_size: float = 0.1,
+        untouched_messages: int = 5,
+        tool_specs: list[dict[str, Any]] | None = None,
+        prompt_file_suffix: str = "system_prompt_v2.yaml",
+    ):
+        self.system_prompt = self._load_system_prompt(
+            tool_specs or [],
+            prompt_file_suffix="system_prompt_v2.yaml",
+        )
+        self.max_context = max_context
+        self.compact_size = int(max_context * compact_size)
+        self.context_length = len(self.system_prompt) // 4
+        self.untouched_messages = untouched_messages
+        self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
+    def _load_system_prompt(
+        self,
+        tool_specs: list[dict[str, Any]],
+        prompt_file_suffix: str = "system_prompt.yaml",
+    ):
+        """Load and render the system prompt from YAML file with Jinja2"""
+        prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
+        with open(prompt_file, "r") as f:
+            prompt_data = yaml.safe_load(f)
+            template_str = prompt_data.get("system_prompt", "")
+        # Get current date and time
+        tz = zoneinfo.ZoneInfo("Europe/Paris")
+        now = datetime.now(tz)
+        current_date = now.strftime("%d-%m-%Y")
+        current_time = now.strftime("%H:%M:%S.%f")[:-3]
+        current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
+        # Get HF user info (cached after the first call)
+        hf_user_info = _get_hf_username()
+        template = Template(template_str)
+        return template.render(
+            tools=tool_specs,
+            num_tools=len(tool_specs),
+            current_date=current_date,
+            current_time=current_time,
+            current_timezone=current_timezone,
+            hf_user_info=hf_user_info,
+        )
+    def add_message(self, message: Message, token_count: int = None) -> None:
+        """Add a message to the history"""
+        if token_count:
+            self.context_length = token_count
+        self.items.append(message)
+    def get_messages(self) -> list[Message]:
+        """Get all messages for sending to LLM"""
+        return self.items
+    async def compact(self, model_name: str) -> None:
+        """Remove old messages to keep history under target size"""
+        if (self.context_length <= self.max_context) or not self.items:
+            return
+        system_msg = (
+            self.items[0] if self.items and self.items[0].role == "system" else None
+        )
+        # Don't summarize a certain number of just-preceding messages
+        # Walk back to find a user message to make sure we keep an assistant -> user ->
+        # assistant general conversation structure
+        idx = len(self.items) - self.untouched_messages
+        while idx > 1 and self.items[idx].role != "user":
+            idx -= 1
+        recent_messages = self.items[idx:]
+        messages_to_summarize = self.items[1:idx]
+        # improbable, messages would have to very long
+        if not messages_to_summarize:
+            return
+        messages_to_summarize.append(
+            Message(
+                role="user",
+                content="Please provide a concise summary of the conversation above, focusing on key decisions, code changes, problems solved, and important context needed for future turns.",
+            )
+        )
+        hf_key = os.environ.get("INFERENCE_TOKEN")
+        response = await acompletion(
+            model=model_name,
+            messages=messages_to_summarize,
+            max_completion_tokens=self.compact_size,
+            api_key=hf_key
+            if hf_key and model_name.startswith("huggingface/")
+            else None,
+        )
+        summarized_message = Message(
+            role="assistant", content=response.choices[0].message.content
+        )
+        # Reconstruct: system + summary + recent messages (includes tools)
+        if system_msg:
+            self.items = [system_msg, summarized_message] + recent_messages
+        else:
+            self.items = [summarized_message] + recent_messages
+        self.context_length = (
+            len(self.system_prompt) // 4 + response.usage.completion_tokens
+        )

agent/core/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""
+Core agent implementation
+Contains the main agent logic, decision-making, and orchestration
+"""
+from agent.core.tools import ToolRouter, ToolSpec, create_builtin_tools
+__all__ = [
+    "ToolRouter",
+    "ToolSpec",
+    "create_builtin_tools",
+]

agent/core/agent_loop.py ADDED Viewed

	@@ -0,0 +1,711 @@

+"""loop
+Main agent implementation with integrated tool system and MCP support
+"""
+import asyncio
+import json
+import logging
+import os
+from litellm import ChatCompletionMessageToolCall, Message, acompletion
+from lmnr import observe
+from agent.config import Config
+from agent.core.session import Event, OpType, Session
+from agent.core.tools import ToolRouter
+from agent.tools.jobs_tool import CPU_FLAVORS
+logger = logging.getLogger(__name__)
+ToolCall = ChatCompletionMessageToolCall
+# Explicit inference token — needed because litellm checks HF_TOKEN before
+# HUGGINGFACE_API_KEY, and HF_TOKEN (used for Hub ops) may lack inference permissions.
+_INFERENCE_API_KEY = os.environ.get("INFERENCE_TOKEN")
+def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
+    """
+    Validate tool arguments structure.
+    Returns:
+        (is_valid, error_message)
+    """
+    args = tool_args.get("args", {})
+    # Sometimes LLM passes args as string instead of dict
+    if isinstance(args, str):
+        return (
+            False,
+            f"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}",
+        )
+    if not isinstance(args, dict) and args is not None:
+        return (
+            False,
+            f"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}",
+        )
+    return True, None
+def _needs_approval(
+    tool_name: str, tool_args: dict, config: Config | None = None
+) -> bool:
+    """Check if a tool call requires user approval before execution."""
+    # Yolo mode: skip all approvals
+    if config and config.yolo_mode:
+        return False
+    # If args are malformed, skip approval (validation error will be shown later)
+    args_valid, _ = _validate_tool_args(tool_args)
+    if not args_valid:
+        return False
+    if tool_name == "hf_jobs":
+        operation = tool_args.get("operation", "")
+        if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
+            return False
+        # Check if this is a CPU-only job
+        # hardware_flavor is at top level of tool_args, not nested in args
+        hardware_flavor = (
+            tool_args.get("hardware_flavor")
+            or tool_args.get("flavor")
+            or tool_args.get("hardware")
+            or "cpu-basic"
+        )
+        is_cpu_job = hardware_flavor in CPU_FLAVORS
+        if is_cpu_job:
+            if config and not config.confirm_cpu_jobs:
+                return False
+            return True
+        return True
+    # Check for file upload operations (hf_private_repos or other tools)
+    if tool_name == "hf_private_repos":
+        operation = tool_args.get("operation", "")
+        if operation == "upload_file":
+            if config and config.auto_file_upload:
+                return False
+            return True
+        # Other operations (create_repo, etc.) always require approval
+        if operation in ["create_repo"]:
+            return True
+    # hf_repo_files: upload (can overwrite) and delete require approval
+    if tool_name == "hf_repo_files":
+        operation = tool_args.get("operation", "")
+        if operation in ["upload", "delete"]:
+            return True
+    # hf_repo_git: destructive operations require approval
+    if tool_name == "hf_repo_git":
+        operation = tool_args.get("operation", "")
+        if operation in [
+            "delete_branch",
+            "delete_tag",
+            "merge_pr",
+            "create_repo",
+            "update_repo",
+        ]:
+            return True
+    return False
+class Handlers:
+    """Handler functions for each operation type"""
+    @staticmethod
+    @observe(name="run_agent")
+    async def run_agent(
+        session: Session, text: str, max_iterations: int = 10
+    ) -> str | None:
+        """
+        Handle user input (like user_input_or_turn in codex.rs:1291)
+        Returns the final assistant response content, if any.
+        """
+        # Set session ID for this trace
+        if hasattr(session, "session_id"):
+            from lmnr import Laminar
+            Laminar.set_trace_session_id(session_id=session.session_id)
+        # Add user message to history only if there's actual content
+        if text:
+            user_msg = Message(role="user", content=text)
+            session.context_manager.add_message(user_msg)
+        # Send event that we're processing
+        await session.send_event(
+            Event(event_type="processing", data={"message": "Processing user input"})
+        )
+        # Agentic loop - continue until model doesn't call tools or max iterations is reached
+        iteration = 0
+        final_response = None
+        while iteration < max_iterations:
+            messages = session.context_manager.get_messages()
+            tools = session.tool_router.get_tool_specs_for_llm()
+            try:
+                # ── Stream the LLM response ──────────────────────────
+                response = await acompletion(
+                    model=session.config.model_name,
+                    messages=messages,
+                    tools=tools,
+                    tool_choice="auto",
+                    stream=True,
+                    stream_options={"include_usage": True},
+                    api_key=_INFERENCE_API_KEY
+                    if _INFERENCE_API_KEY
+                    and session.config.model_name.startswith("huggingface/")
+                    else None,
+                )
+                full_content = ""
+                tool_calls_acc: dict[int, dict] = {}
+                token_count = 0
+                async for chunk in response:
+                    choice = chunk.choices[0] if chunk.choices else None
+                    if not choice:
+                        # Last chunk may carry only usage info
+                        if hasattr(chunk, "usage") and chunk.usage:
+                            token_count = chunk.usage.total_tokens
+                        continue
+                    delta = choice.delta
+                    # Stream text deltas to the frontend
+                    if delta.content:
+                        full_content += delta.content
+                        await session.send_event(
+                            Event(
+                                event_type="assistant_chunk",
+                                data={"content": delta.content},
+                            )
+                        )
+                    # Accumulate tool-call deltas (name + args arrive in pieces)
+                    if delta.tool_calls:
+                        for tc_delta in delta.tool_calls:
+                            idx = tc_delta.index
+                            if idx not in tool_calls_acc:
+                                tool_calls_acc[idx] = {
+                                    "id": "",
+                                    "type": "function",
+                                    "function": {"name": "", "arguments": ""},
+                                }
+                            if tc_delta.id:
+                                tool_calls_acc[idx]["id"] = tc_delta.id
+                            if tc_delta.function:
+                                if tc_delta.function.name:
+                                    tool_calls_acc[idx]["function"]["name"] += (
+                                        tc_delta.function.name
+                                    )
+                                if tc_delta.function.arguments:
+                                    tool_calls_acc[idx]["function"]["arguments"] += (
+                                        tc_delta.function.arguments
+                                    )
+                    # Capture usage from the final chunk
+                    if hasattr(chunk, "usage") and chunk.usage:
+                        token_count = chunk.usage.total_tokens
+                # ── Stream finished — reconstruct full message ───────
+                content = full_content or None
+                # Build tool_calls list from accumulated deltas
+                tool_calls: list[ToolCall] = []
+                for idx in sorted(tool_calls_acc.keys()):
+                    tc_data = tool_calls_acc[idx]
+                    tool_calls.append(
+                        ToolCall(
+                            id=tc_data["id"],
+                            type="function",
+                            function={
+                                "name": tc_data["function"]["name"],
+                                "arguments": tc_data["function"]["arguments"],
+                            },
+                        )
+                    )
+                # Signal end of streaming to the frontend
+                await session.send_event(
+                    Event(event_type="assistant_stream_end", data={})
+                )
+                # If no tool calls, add assistant message and we're done
+                if not tool_calls:
+                    if content:
+                        assistant_msg = Message(role="assistant", content=content)
+                        session.context_manager.add_message(assistant_msg, token_count)
+                        final_response = content
+                    break
+                # Add assistant message with tool calls to history
+                assistant_msg = Message(
+                    role="assistant",
+                    content=content,
+                    tool_calls=tool_calls,
+                )
+                session.context_manager.add_message(assistant_msg, token_count)
+                # Separate tools into those requiring approval and those that don't
+                approval_required_tools = []
+                non_approval_tools = []
+                for tc in tool_calls:
+                    tool_name = tc.function.name
+                    try:
+                        tool_args = json.loads(tc.function.arguments)
+                    except (json.JSONDecodeError, TypeError) as e:
+                        logger.warning(f"Malformed tool arguments for {tool_name}: {e}")
+                        tool_args = {}
+                    if _needs_approval(tool_name, tool_args, session.config):
+                        approval_required_tools.append(tc)
+                    else:
+                        non_approval_tools.append(tc)
+                # Execute non-approval tools (in parallel when possible)
+                if non_approval_tools:
+                    # 1. Parse args and validate upfront
+                    parsed_tools: list[
+                        tuple[ChatCompletionMessageToolCall, str, dict, bool, str]
+                    ] = []
+                    for tc in non_approval_tools:
+                        tool_name = tc.function.name
+                        try:
+                            tool_args = json.loads(tc.function.arguments)
+                        except (json.JSONDecodeError, TypeError):
+                            tool_args = {}
+                        args_valid, error_msg = _validate_tool_args(tool_args)
+                        parsed_tools.append(
+                            (tc, tool_name, tool_args, args_valid, error_msg)
+                        )
+                    # 2. Send all tool_call events upfront (so frontend shows them all)
+                    for tc, tool_name, tool_args, args_valid, _ in parsed_tools:
+                        if args_valid:
+                            await session.send_event(
+                                Event(
+                                    event_type="tool_call",
+                                    data={
+                                        "tool": tool_name,
+                                        "arguments": tool_args,
+                                        "tool_call_id": tc.id,
+                                    },
+                                )
+                            )
+                    # 3. Execute all valid tools in parallel
+                    async def _exec_tool(
+                        tc: ChatCompletionMessageToolCall,
+                        name: str,
+                        args: dict,
+                        valid: bool,
+                        err: str,
+                    ) -> tuple[ChatCompletionMessageToolCall, str, dict, str, bool]:
+                        if not valid:
+                            return (tc, name, args, err, False)
+                        out, ok = await session.tool_router.call_tool(
+                            name, args, session=session
+                        )
+                        return (tc, name, args, out, ok)
+                    results = await asyncio.gather(
+                        *[
+                            _exec_tool(tc, name, args, valid, err)
+                            for tc, name, args, valid, err in parsed_tools
+                        ]
+                    )
+                    # 4. Record results and send outputs (order preserved)
+                    for tc, tool_name, tool_args, output, success in results:
+                        tool_msg = Message(
+                            role="tool",
+                            content=output,
+                            tool_call_id=tc.id,
+                            name=tool_name,
+                        )
+                        session.context_manager.add_message(tool_msg)
+                        await session.send_event(
+                            Event(
+                                event_type="tool_output",
+                                data={
+                                    "tool": tool_name,
+                                    "tool_call_id": tc.id,
+                                    "output": output,
+                                    "success": success,
+                                },
+                            )
+                        )
+                # If there are tools requiring approval, ask for batch approval
+                if approval_required_tools:
+                    # Prepare batch approval data
+                    tools_data = []
+                    for tc in approval_required_tools:
+                        tool_name = tc.function.name
+                        try:
+                            tool_args = json.loads(tc.function.arguments)
+                        except (json.JSONDecodeError, TypeError):
+                            tool_args = {}
+                        tools_data.append(
+                            {
+                                "tool": tool_name,
+                                "arguments": tool_args,
+                                "tool_call_id": tc.id,
+                            }
+                        )
+                    await session.send_event(
+                        Event(
+                            event_type="approval_required",
+                            data={
+                                "tools": tools_data,  # Batch of tools
+                                "count": len(tools_data),
+                            },
+                        )
+                    )
+                    # Store all approval-requiring tools
+                    session.pending_approval = {
+                        "tool_calls": approval_required_tools,
+                    }
+                    # Return early - wait for EXEC_APPROVAL operation
+                    return None
+                iteration += 1
+            except Exception as e:
+                import traceback
+                await session.send_event(
+                    Event(
+                        event_type="error",
+                        data={"error": str(e) + "\n" + traceback.format_exc()},
+                    )
+                )
+                break
+        old_length = session.context_manager.context_length
+        await session.context_manager.compact(model_name=session.config.model_name)
+        new_length = session.context_manager.context_length
+        if new_length != old_length:
+            await session.send_event(
+                Event(
+                    event_type="compacted",
+                    data={"old_tokens": old_length, "new_tokens": new_length},
+                )
+            )
+        await session.send_event(
+            Event(
+                event_type="turn_complete",
+                data={"history_size": len(session.context_manager.items)},
+            )
+        )
+        # Increment turn counter and check for auto-save
+        session.increment_turn()
+        await session.auto_save_if_needed()
+        return final_response
+    @staticmethod
+    async def interrupt(session: Session) -> None:
+        """Handle interrupt (like interrupt in codex.rs:1266)"""
+        session.interrupt()
+        await session.send_event(Event(event_type="interrupted"))
+    @staticmethod
+    async def compact(session: Session) -> None:
+        """Handle compact (like compact in codex.rs:1317)"""
+        old_length = session.context_manager.context_length
+        await session.context_manager.compact(model_name=session.config.model_name)
+        new_length = session.context_manager.context_length
+        await session.send_event(
+            Event(
+                event_type="compacted",
+                data={"removed": old_length, "remaining": new_length},
+            )
+        )
+    @staticmethod
+    async def undo(session: Session) -> None:
+        """Remove the last complete turn (user msg + all assistant/tool msgs that follow).
+        Anthropic requires every tool_use to have a matching tool_result,
+        so we can't just pop 2 items — we must pop everything back to
+        (and including) the last user message to keep the history valid.
+        """
+        items = session.context_manager.items
+        if not items:
+            await session.send_event(Event(event_type="undo_complete"))
+            return
+        # Pop from the end until we've removed the last user message
+        removed_user = False
+        while items:
+            msg = items.pop()
+            if getattr(msg, "role", None) == "user":
+                removed_user = True
+                break
+        if not removed_user:
+            logger.warning("Undo: no user message found to remove")
+        await session.send_event(Event(event_type="undo_complete"))
+    @staticmethod
+    async def exec_approval(session: Session, approvals: list[dict]) -> None:
+        """Handle batch job execution approval"""
+        if not session.pending_approval:
+            await session.send_event(
+                Event(
+                    event_type="error",
+                    data={"error": "No pending approval to process"},
+                )
+            )
+            return
+        tool_calls = session.pending_approval.get("tool_calls", [])
+        if not tool_calls:
+            await session.send_event(
+                Event(
+                    event_type="error",
+                    data={"error": "No pending tool calls found"},
+                )
+            )
+            return
+        # Create a map of tool_call_id -> approval decision
+        approval_map = {a["tool_call_id"]: a for a in approvals}
+        # Separate approved and rejected tool calls
+        approved_tasks = []
+        rejected_tasks = []
+        for tc in tool_calls:
+            tool_name = tc.function.name
+            tool_args = json.loads(tc.function.arguments)
+            approval_decision = approval_map.get(tc.id, {"approved": False})
+            if approval_decision.get("approved", False):
+                approved_tasks.append((tc, tool_name, tool_args))
+            else:
+                rejected_tasks.append((tc, tool_name, approval_decision))
+        # Execute all approved tools concurrently
+        async def execute_tool(tc, tool_name, tool_args):
+            """Execute a single tool and return its result"""
+            await session.send_event(
+                Event(
+                    event_type="tool_call",
+                    data={
+                        "tool": tool_name,
+                        "arguments": tool_args,
+                        "tool_call_id": tc.id,
+                    },
+                )
+            )
+            output, success = await session.tool_router.call_tool(
+                tool_name, tool_args, session=session
+            )
+            return (tc, tool_name, output, success)
+        # Execute all approved tools concurrently and wait for ALL to complete
+        if approved_tasks:
+            results = await asyncio.gather(
+                *[
+                    execute_tool(tc, tool_name, tool_args)
+                    for tc, tool_name, tool_args in approved_tasks
+                ],
+                return_exceptions=True,
+            )
+            # Process results and add to context
+            for result in results:
+                if isinstance(result, Exception):
+                    # Handle execution error
+                    logger.error(f"Tool execution error: {result}")
+                    continue
+                tc, tool_name, output, success = result
+                # Add tool result to context
+                tool_msg = Message(
+                    role="tool",
+                    content=output,
+                    tool_call_id=tc.id,
+                    name=tool_name,
+                )
+                session.context_manager.add_message(tool_msg)
+                await session.send_event(
+                    Event(
+                        event_type="tool_output",
+                        data={
+                            "tool": tool_name,
+                            "tool_call_id": tc.id,
+                            "output": output,
+                            "success": success,
+                        },
+                    )
+                )
+        # Process rejected tools
+        for tc, tool_name, approval_decision in rejected_tasks:
+            rejection_msg = "Job execution cancelled by user"
+            user_feedback = approval_decision.get("feedback")
+            if user_feedback:
+                rejection_msg += f". User feedback: {user_feedback}"
+            tool_msg = Message(
+                role="tool",
+                content=rejection_msg,
+                tool_call_id=tc.id,
+                name=tool_name,
+            )
+            session.context_manager.add_message(tool_msg)
+            await session.send_event(
+                Event(
+                    event_type="tool_output",
+                    data={
+                        "tool": tool_name,
+                        "tool_call_id": tc.id,
+                        "output": rejection_msg,
+                        "success": False,
+                    },
+                )
+            )
+        # Clear pending approval
+        session.pending_approval = None
+        # Continue agent loop with empty input to process the tool results
+        await Handlers.run_agent(session, "")
+    @staticmethod
+    async def shutdown(session: Session) -> bool:
+        """Handle shutdown (like shutdown in codex.rs:1329)"""
+        # Save session trajectory if enabled (fire-and-forget, returns immediately)
+        if session.config.save_sessions:
+            logger.info("Saving session...")
+            repo_id = session.config.session_dataset_repo
+            _ = session.save_and_upload_detached(repo_id)
+        session.is_running = False
+        await session.send_event(Event(event_type="shutdown"))
+        return True
+async def process_submission(session: Session, submission) -> bool:
+    """
+    Process a single submission and return whether to continue running.
+    Returns:
+        bool: True to continue, False to shutdown
+    """
+    op = submission.operation
+    logger.debug("Received operation: %s", op.op_type.value)
+    if op.op_type == OpType.USER_INPUT:
+        text = op.data.get("text", "") if op.data else ""
+        await Handlers.run_agent(session, text)
+        return True
+    if op.op_type == OpType.INTERRUPT:
+        await Handlers.interrupt(session)
+        return True
+    if op.op_type == OpType.COMPACT:
+        await Handlers.compact(session)
+        return True
+    if op.op_type == OpType.UNDO:
+        await Handlers.undo(session)
+        return True
+    if op.op_type == OpType.EXEC_APPROVAL:
+        approvals = op.data.get("approvals", []) if op.data else []
+        await Handlers.exec_approval(session, approvals)
+        return True
+    if op.op_type == OpType.SHUTDOWN:
+        return not await Handlers.shutdown(session)
+    logger.warning(f"Unknown operation: {op.op_type}")
+    return True
+@observe(name="submission_loop")
+async def submission_loop(
+    submission_queue: asyncio.Queue,
+    event_queue: asyncio.Queue,
+    config: Config | None = None,
+    tool_router: ToolRouter | None = None,
+) -> None:
+    """
+    Main agent loop - processes submissions and dispatches to handlers.
+    This is the core of the agent (like submission_loop in codex.rs:1259-1340)
+    """
+    # Create session with tool router
+    session = Session(event_queue, config=config, tool_router=tool_router)
+    logger.info("Agent loop started")
+    # Retry any failed uploads from previous sessions (fire-and-forget)
+    if config and config.save_sessions:
+        Session.retry_failed_uploads_detached(
+            directory="session_logs", repo_id=config.session_dataset_repo
+        )
+    try:
+        # Main processing loop
+        async with tool_router:
+            # Emit ready event after initialization
+            await session.send_event(
+                Event(event_type="ready", data={"message": "Agent initialized"})
+            )
+            while session.is_running:
+                submission = await submission_queue.get()
+                try:
+                    should_continue = await process_submission(session, submission)
+                    if not should_continue:
+                        break
+                except asyncio.CancelledError:
+                    logger.warning("Agent loop cancelled")
+                    break
+                except Exception as e:
+                    logger.error(f"Error in agent loop: {e}")
+                    await session.send_event(
+                        Event(event_type="error", data={"error": str(e)})
+                    )
+        logger.info("Agent loop exited")
+    finally:
+        # Emergency save if session saving is enabled and shutdown wasn't called properly
+        if session.config.save_sessions and session.is_running:
+            logger.info("Emergency save: preserving session before exit...")
+            try:
+                local_path = session.save_and_upload_detached(
+                    session.config.session_dataset_repo
+                )
+                if local_path:
+                    logger.info("Emergency save successful, upload in progress")
+            except Exception as e:
+                logger.error(f"Emergency save failed: {e}")

agent/core/session.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import asyncio
+import json
+import logging
+import subprocess
+import sys
+import uuid
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Optional
+from agent.config import Config
+from agent.context_manager.manager import ContextManager
+logger = logging.getLogger(__name__)
+# Local max-token lookup — avoids litellm.get_max_tokens() which can hang
+# on network calls for certain providers (known litellm issue).
+_MAX_TOKENS_MAP: dict[str, int] = {
+    # Anthropic
+    "anthropic/claude-opus-4-5-20251101": 200_000,
+    "anthropic/claude-sonnet-4-5-20250929": 200_000,
+    "anthropic/claude-sonnet-4-20250514": 200_000,
+    "anthropic/claude-haiku-3-5-20241022": 200_000,
+    "anthropic/claude-3-5-sonnet-20241022": 200_000,
+    "anthropic/claude-3-opus-20240229": 200_000,
+    "huggingface/novita/MiniMaxAI/MiniMax-M2.1": 196_608,
+    "huggingface/novita/moonshotai/Kimi-K2.5": 262_144,
+    "huggingface/novita/zai-org/GLM-5": 200_000,
+}
+_DEFAULT_MAX_TOKENS = 200_000
+def _get_max_tokens_safe(model_name: str) -> int:
+    """Return the max context window for a model without network calls."""
+    tokens = _MAX_TOKENS_MAP.get(model_name)
+    if tokens:
+        return tokens
+    # Fallback: try litellm but with a short timeout via threading
+    try:
+        from litellm import get_max_tokens
+        result = get_max_tokens(model_name)
+        if result and isinstance(result, int):
+            return result
+        logger.warning(
+            f"get_max_tokens returned {result} for {model_name}, using default"
+        )
+        return _DEFAULT_MAX_TOKENS
+    except Exception as e:
+        logger.warning(f"get_max_tokens failed for {model_name}, using default: {e}")
+        return _DEFAULT_MAX_TOKENS
+class OpType(Enum):
+    USER_INPUT = "user_input"
+    EXEC_APPROVAL = "exec_approval"
+    INTERRUPT = "interrupt"
+    UNDO = "undo"
+    COMPACT = "compact"
+    SHUTDOWN = "shutdown"
+@dataclass
+class Event:
+    event_type: str
+    data: Optional[dict[str, Any]] = None
+class Session:
+    """
+    Maintains agent session state
+    Similar to Session in codex-rs/core/src/codex.rs
+    """
+    def __init__(
+        self,
+        event_queue: asyncio.Queue,
+        config: Config | None = None,
+        tool_router=None,
+        context_manager: ContextManager | None = None,
+    ):
+        self.tool_router = tool_router
+        tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
+        self.context_manager = context_manager or ContextManager(
+            max_context=_get_max_tokens_safe(config.model_name),
+            compact_size=0.1,
+            untouched_messages=5,
+            tool_specs=tool_specs,
+        )
+        self.event_queue = event_queue
+        self.session_id = str(uuid.uuid4())
+        self.config = config or Config(
+            model_name="anthropic/claude-sonnet-4-5-20250929",
+        )
+        self.is_running = True
+        self.current_task: asyncio.Task | None = None
+        self.pending_approval: Optional[dict[str, Any]] = None
+        # User's HF OAuth token — set by session_manager after construction
+        self.hf_token: Optional[str] = None
+        # Session trajectory logging
+        self.logged_events: list[dict] = []
+        self.session_start_time = datetime.now().isoformat()
+        self.turn_count: int = 0
+        self.last_auto_save_turn: int = 0
+    async def send_event(self, event: Event) -> None:
+        """Send event back to client and log to trajectory"""
+        await self.event_queue.put(event)
+        # Log event to trajectory
+        self.logged_events.append(
+            {
+                "timestamp": datetime.now().isoformat(),
+                "event_type": event.event_type,
+                "data": event.data,
+            }
+        )
+    def interrupt(self) -> None:
+        """Interrupt current running task"""
+        if self.current_task and not self.current_task.done():
+            self.current_task.cancel()
+    def increment_turn(self) -> None:
+        """Increment turn counter (called after each user interaction)"""
+        self.turn_count += 1
+    async def auto_save_if_needed(self) -> None:
+        """Check if auto-save should trigger and save if so (completely non-blocking)"""
+        if not self.config.save_sessions:
+            return
+        interval = self.config.auto_save_interval
+        if interval <= 0:
+            return
+        turns_since_last_save = self.turn_count - self.last_auto_save_turn
+        if turns_since_last_save >= interval:
+            logger.info(f"Auto-saving session (turn {self.turn_count})...")
+            # Fire-and-forget save - returns immediately
+            self.save_and_upload_detached(self.config.session_dataset_repo)
+            self.last_auto_save_turn = self.turn_count
+    def get_trajectory(self) -> dict:
+        """Serialize complete session trajectory for logging"""
+        return {
+            "session_id": self.session_id,
+            "session_start_time": self.session_start_time,
+            "session_end_time": datetime.now().isoformat(),
+            "model_name": self.config.model_name,
+            "messages": [msg.model_dump() for msg in self.context_manager.items],
+            "events": self.logged_events,
+        }
+    def save_trajectory_local(
+        self,
+        directory: str = "session_logs",
+        upload_status: str = "pending",
+        dataset_url: Optional[str] = None,
+    ) -> Optional[str]:
+        """
+        Save trajectory to local JSON file as backup with upload status
+        Args:
+            directory: Directory to save logs (default: "session_logs")
+            upload_status: Status of upload attempt ("pending", "success", "failed")
+            dataset_url: URL of dataset if upload succeeded
+        Returns:
+            Path to saved file if successful, None otherwise
+        """
+        try:
+            log_dir = Path(directory)
+            log_dir.mkdir(parents=True, exist_ok=True)
+            trajectory = self.get_trajectory()
+            # Add upload metadata
+            trajectory["upload_status"] = upload_status
+            trajectory["upload_url"] = dataset_url
+            trajectory["last_save_time"] = datetime.now().isoformat()
+            filename = f"session_{self.session_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+            filepath = log_dir / filename
+            with open(filepath, "w") as f:
+                json.dump(trajectory, f, indent=2)
+            return str(filepath)
+        except Exception as e:
+            logger.error(f"Failed to save session locally: {e}")
+            return None
+    def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
+        """
+        Save session locally and spawn detached subprocess for upload (fire-and-forget)
+        Args:
+            repo_id: HuggingFace dataset repo ID
+        Returns:
+            Path to local save file
+        """
+        # Save locally first (fast, synchronous)
+        local_path = self.save_trajectory_local(upload_status="pending")
+        if not local_path:
+            return None
+        # Spawn detached subprocess for upload (fire-and-forget)
+        try:
+            uploader_script = Path(__file__).parent / "session_uploader.py"
+            # Use Popen with detached process
+            subprocess.Popen(
+                [sys.executable, str(uploader_script), "upload", local_path, repo_id],
+                stdin=subprocess.DEVNULL,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True,  # Detach from parent
+            )
+        except Exception as e:
+            logger.warning(f"Failed to spawn upload subprocess: {e}")
+        return local_path
+    @staticmethod
+    def retry_failed_uploads_detached(
+        directory: str = "session_logs", repo_id: Optional[str] = None
+    ) -> None:
+        """
+        Spawn detached subprocess to retry failed/pending uploads (fire-and-forget)
+        Args:
+            directory: Directory containing session logs
+            repo_id: Target dataset repo ID
+        """
+        if not repo_id:
+            return
+        try:
+            uploader_script = Path(__file__).parent / "session_uploader.py"
+            # Spawn detached subprocess for retry
+            subprocess.Popen(
+                [sys.executable, str(uploader_script), "retry", directory, repo_id],
+                stdin=subprocess.DEVNULL,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True,  # Detach from parent
+            )
+        except Exception as e:
+            logger.warning(f"Failed to spawn retry subprocess: {e}")

agent/core/session_uploader.py ADDED Viewed

	@@ -0,0 +1,202 @@

+#!/usr/bin/env python3
+"""
+Standalone script for uploading session trajectories to HuggingFace.
+This runs as a separate process to avoid blocking the main agent.
+Uses individual file uploads to avoid race conditions.
+"""
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+# Token for session uploads — loaded from env var (never hardcode tokens in source)
+_SESSION_TOKEN = os.environ.get("HF_SESSION_UPLOAD_TOKEN", "")
+def upload_session_as_file(
+    session_file: str, repo_id: str, max_retries: int = 3
+) -> bool:
+    """
+    Upload a single session as an individual JSONL file (no race conditions)
+    Args:
+        session_file: Path to local session JSON file
+        repo_id: HuggingFace dataset repo ID
+        max_retries: Number of retry attempts
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        from huggingface_hub import HfApi
+    except ImportError:
+        print("Error: huggingface_hub library not available", file=sys.stderr)
+        return False
+    try:
+        # Load session data
+        with open(session_file, "r") as f:
+            data = json.load(f)
+        # Check if already uploaded
+        upload_status = data.get("upload_status")
+        if upload_status == "success":
+            return True
+        # Use dedicated session upload token (write-only access to session dataset)
+        hf_token = _SESSION_TOKEN
+        if not hf_token:
+            # Update status to failed
+            data["upload_status"] = "failed"
+            with open(session_file, "w") as f:
+                json.dump(data, f, indent=2)
+            return False
+        # Prepare JSONL content (single line)
+        # Store messages and events as JSON strings to avoid schema conflicts
+        session_row = {
+            "session_id": data["session_id"],
+            "session_start_time": data["session_start_time"],
+            "session_end_time": data["session_end_time"],
+            "model_name": data["model_name"],
+            "messages": json.dumps(data["messages"]),
+            "events": json.dumps(data["events"]),
+        }
+        # Create temporary JSONL file
+        import tempfile
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".jsonl", delete=False
+        ) as tmp:
+            json.dump(session_row, tmp)  # Single line JSON
+            tmp_path = tmp.name
+        try:
+            # Generate unique path in repo: sessions/YYYY-MM-DD/session_id.jsonl
+            session_id = data["session_id"]
+            date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
+                "%Y-%m-%d"
+            )
+            repo_path = f"sessions/{date_str}/{session_id}.jsonl"
+            # Upload with retries
+            api = HfApi()
+            for attempt in range(max_retries):
+                try:
+                    # Try to create repo if it doesn't exist (idempotent)
+                    try:
+                        api.create_repo(
+                            repo_id=repo_id,
+                            repo_type="dataset",
+                            private=False,
+                            token=hf_token,
+                            exist_ok=True,  # Don't fail if already exists
+                        )
+                    except Exception:
+                        # Repo might already exist, continue
+                        pass
+                    # Upload the session file
+                    api.upload_file(
+                        path_or_fileobj=tmp_path,
+                        path_in_repo=repo_path,
+                        repo_id=repo_id,
+                        repo_type="dataset",
+                        token=hf_token,
+                        commit_message=f"Add session {session_id}",
+                    )
+                    # Update local status to success
+                    data["upload_status"] = "success"
+                    data["upload_url"] = f"https://huggingface.co/datasets/{repo_id}"
+                    with open(session_file, "w") as f:
+                        json.dump(data, f, indent=2)
+                    return True
+                except Exception:
+                    if attempt < max_retries - 1:
+                        import time
+                        wait_time = 2**attempt
+                        time.sleep(wait_time)
+                    else:
+                        # Final attempt failed
+                        data["upload_status"] = "failed"
+                        with open(session_file, "w") as f:
+                            json.dump(data, f, indent=2)
+                        return False
+        finally:
+            # Clean up temp file
+            try:
+                os.unlink(tmp_path)
+            except Exception:
+                pass
+    except Exception as e:
+        print(f"Error uploading session: {e}", file=sys.stderr)
+        return False
+def retry_failed_uploads(directory: str, repo_id: str):
+    """Retry all failed/pending uploads in a directory"""
+    log_dir = Path(directory)
+    if not log_dir.exists():
+        return
+    session_files = list(log_dir.glob("session_*.json"))
+    for filepath in session_files:
+        try:
+            with open(filepath, "r") as f:
+                data = json.load(f)
+            upload_status = data.get("upload_status", "unknown")
+            # Only retry pending or failed uploads
+            if upload_status in ["pending", "failed"]:
+                upload_session_as_file(str(filepath), repo_id)
+        except Exception:
+            pass
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print("Usage: session_uploader.py <command> <args...>")
+        sys.exit(1)
+    command = sys.argv[1]
+    if command == "upload":
+        # python session_uploader.py upload <session_file> <repo_id>
+        if len(sys.argv) < 4:
+            print("Usage: session_uploader.py upload <session_file> <repo_id>")
+            sys.exit(1)
+        session_file = sys.argv[2]
+        repo_id = sys.argv[3]
+        success = upload_session_as_file(session_file, repo_id)
+        sys.exit(0 if success else 1)
+    elif command == "retry":
+        # python session_uploader.py retry <directory> <repo_id>
+        if len(sys.argv) < 4:
+            print("Usage: session_uploader.py retry <directory> <repo_id>")
+            sys.exit(1)
+        directory = sys.argv[2]
+        repo_id = sys.argv[3]
+        retry_failed_uploads(directory, repo_id)
+        sys.exit(0)
+    else:
+        print(f"Unknown command: {command}")
+        sys.exit(1)

agent/core/tools.py ADDED Viewed

	@@ -0,0 +1,337 @@

+"""
+Tool system for the agent
+Provides ToolSpec and ToolRouter for managing both built-in and MCP tools
+"""
+import logging
+import warnings
+from dataclasses import dataclass
+from typing import Any, Awaitable, Callable, Optional
+logger = logging.getLogger(__name__)
+from fastmcp import Client
+from fastmcp.exceptions import ToolError
+from lmnr import observe
+from mcp.types import EmbeddedResource, ImageContent, TextContent
+from agent.config import MCPServerConfig
+from agent.tools.dataset_tools import (
+    HF_INSPECT_DATASET_TOOL_SPEC,
+    hf_inspect_dataset_handler,
+)
+from agent.tools.docs_tools import (
+    EXPLORE_HF_DOCS_TOOL_SPEC,
+    HF_DOCS_FETCH_TOOL_SPEC,
+    explore_hf_docs_handler,
+    hf_docs_fetch_handler,
+)
+from agent.tools.github_find_examples import (
+    GITHUB_FIND_EXAMPLES_TOOL_SPEC,
+    github_find_examples_handler,
+)
+from agent.tools.github_list_repos import (
+    GITHUB_LIST_REPOS_TOOL_SPEC,
+    github_list_repos_handler,
+)
+from agent.tools.github_read_file import (
+    GITHUB_READ_FILE_TOOL_SPEC,
+    github_read_file_handler,
+)
+from agent.tools.hf_repo_files_tool import (
+    HF_REPO_FILES_TOOL_SPEC,
+    hf_repo_files_handler,
+)
+from agent.tools.hf_repo_git_tool import (
+    HF_REPO_GIT_TOOL_SPEC,
+    hf_repo_git_handler,
+)
+from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
+from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
+# NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
+# from agent.tools.private_hf_repo_tools import (
+#     PRIVATE_HF_REPO_TOOL_SPEC,
+#     private_hf_repo_handler,
+# )
+# Suppress aiohttp deprecation warning
+warnings.filterwarnings(
+    "ignore", category=DeprecationWarning, module="aiohttp.connector"
+)
+NOT_ALLOWED_TOOL_NAMES = ["hf_jobs", "hf_doc_search", "hf_doc_fetch", "hf_whoami"]
+def convert_mcp_content_to_string(content: list) -> str:
+    """
+    Convert MCP content blocks to a string format compatible with LLM messages.
+    Based on FastMCP documentation, content can be:
+    - TextContent: has .text field
+    - ImageContent: has .data and .mimeType fields
+    - EmbeddedResource: has .resource field with .text or .blob
+    Args:
+        content: List of MCP content blocks
+    Returns:
+        String representation of the content suitable for LLM consumption
+    """
+    if not content:
+        return ""
+    parts = []
+    for item in content:
+        if isinstance(item, TextContent):
+            # Extract text from TextContent blocks
+            parts.append(item.text)
+        elif isinstance(item, ImageContent):
+            # TODO: Handle images
+            # For images, include a description with MIME type
+            parts.append(f"[Image: {item.mimeType}]")
+        elif isinstance(item, EmbeddedResource):
+            # TODO: Handle embedded resources
+            # For embedded resources, try to extract text
+            resource = item.resource
+            if hasattr(resource, "text") and resource.text:
+                parts.append(resource.text)
+            elif hasattr(resource, "blob") and resource.blob:
+                parts.append(
+                    f"[Binary data: {resource.mimeType if hasattr(resource, 'mimeType') else 'unknown'}]"
+                )
+            else:
+                parts.append(
+                    f"[Resource: {resource.uri if hasattr(resource, 'uri') else 'unknown'}]"
+                )
+        else:
+            # Fallback: try to convert to string
+            parts.append(str(item))
+    return "\n".join(parts)
+@dataclass
+class ToolSpec:
+    """Tool specification for LLM"""
+    name: str
+    description: str
+    parameters: dict[str, Any]
+    handler: Optional[Callable[[dict[str, Any]], Awaitable[tuple[str, bool]]]] = None
+class ToolRouter:
+    """
+    Routes tool calls to appropriate handlers.
+    Based on codex-rs/core/src/tools/router.rs
+    """
+    def __init__(self, mcp_servers: dict[str, MCPServerConfig]):
+        self.tools: dict[str, ToolSpec] = {}
+        self.mcp_servers: dict[str, dict[str, Any]] = {}
+        for tool in create_builtin_tools():
+            self.register_tool(tool)
+        self.mcp_client: Client | None = None
+        if mcp_servers:
+            mcp_servers_payload = {}
+            for name, server in mcp_servers.items():
+                mcp_servers_payload[name] = server.model_dump()
+            self.mcp_client = Client({"mcpServers": mcp_servers_payload})
+        self._mcp_initialized = False
+    def register_tool(self, tool: ToolSpec) -> None:
+        self.tools[tool.name] = tool
+    async def register_mcp_tools(self) -> None:
+        tools = await self.mcp_client.list_tools()
+        registered_names = []
+        skipped_count = 0
+        for tool in tools:
+            if tool.name in NOT_ALLOWED_TOOL_NAMES:
+                skipped_count += 1
+                continue
+            registered_names.append(tool.name)
+            self.register_tool(
+                ToolSpec(
+                    name=tool.name,
+                    description=tool.description,
+                    parameters=tool.inputSchema,
+                    handler=None,
+                )
+            )
+        logger.info(
+            f"Loaded {len(registered_names)} MCP tools: {', '.join(registered_names)} ({skipped_count} disabled)"
+        )
+    async def register_openapi_tool(self) -> None:
+        """Register the OpenAPI search tool (requires async initialization)"""
+        from agent.tools.docs_tools import (
+            _get_api_search_tool_spec,
+            search_openapi_handler,
+        )
+        # Register search_hf_api_endpoints with dynamic spec
+        openapi_spec = await _get_api_search_tool_spec()
+        self.register_tool(
+            ToolSpec(
+                name=openapi_spec["name"],
+                description=openapi_spec["description"],
+                parameters=openapi_spec["parameters"],
+                handler=search_openapi_handler,
+            )
+        )
+        logger.info(f"Loaded OpenAPI search tool: {openapi_spec['name']}")
+    def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
+        """Get tool specifications in OpenAI format"""
+        specs = []
+        for tool in self.tools.values():
+            specs.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": tool.name,
+                        "description": tool.description,
+                        "parameters": tool.parameters,
+                    },
+                }
+            )
+        return specs
+    async def __aenter__(self) -> "ToolRouter":
+        if self.mcp_client is not None:
+            await self.mcp_client.__aenter__()
+            await self.mcp_client.initialize()
+            await self.register_mcp_tools()
+            self._mcp_initialized = True
+        # Register OpenAPI tool (requires async initialization)
+        await self.register_openapi_tool()
+        total_tools = len(self.tools)
+        logger.info(f"Agent ready with {total_tools} tools total")
+        return self
+    async def __aexit__(self, exc_type, exc, tb) -> None:
+        if self.mcp_client is not None:
+            await self.mcp_client.__aexit__(exc_type, exc, tb)
+            self._mcp_initialized = False
+    @observe(name="call_tool")
+    async def call_tool(
+        self, tool_name: str, arguments: dict[str, Any], session: Any = None
+    ) -> tuple[str, bool]:
+        """
+        Call a tool and return (output_string, success_bool).
+        For MCP tools, converts the CallToolResult content blocks to a string.
+        For built-in tools, calls their handler directly.
+        """
+        # Check if this is a built-in tool with a handler
+        tool = self.tools.get(tool_name)
+        if tool and tool.handler:
+            import inspect
+            # Check if handler accepts session argument
+            sig = inspect.signature(tool.handler)
+            if "session" in sig.parameters:
+                return await tool.handler(arguments, session=session)
+            return await tool.handler(arguments)
+        # Otherwise, use MCP client
+        if self._mcp_initialized:
+            try:
+                result = await self.mcp_client.call_tool(tool_name, arguments)
+                output = convert_mcp_content_to_string(result.content)
+                return output, not result.is_error
+            except ToolError as e:
+                # Catch MCP tool errors and return them to the agent
+                error_msg = f"Tool error: {str(e)}"
+                return error_msg, False
+        return "MCP client not initialized", False
+# ============================================================================
+# BUILT-IN TOOL HANDLERS
+# ============================================================================
+def create_builtin_tools() -> list[ToolSpec]:
+    """Create built-in tool specifications"""
+    # in order of importance
+    tools = [
+        # Documentation search tools
+        ToolSpec(
+            name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
+            description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
+            parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
+            handler=explore_hf_docs_handler,
+        ),
+        ToolSpec(
+            name=HF_DOCS_FETCH_TOOL_SPEC["name"],
+            description=HF_DOCS_FETCH_TOOL_SPEC["description"],
+            parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
+            handler=hf_docs_fetch_handler,
+        ),
+        # Dataset inspection tool (unified)
+        ToolSpec(
+            name=HF_INSPECT_DATASET_TOOL_SPEC["name"],
+            description=HF_INSPECT_DATASET_TOOL_SPEC["description"],
+            parameters=HF_INSPECT_DATASET_TOOL_SPEC["parameters"],
+            handler=hf_inspect_dataset_handler,
+        ),
+        # Planning and job management tools
+        ToolSpec(
+            name=PLAN_TOOL_SPEC["name"],
+            description=PLAN_TOOL_SPEC["description"],
+            parameters=PLAN_TOOL_SPEC["parameters"],
+            handler=plan_tool_handler,
+        ),
+        ToolSpec(
+            name=HF_JOBS_TOOL_SPEC["name"],
+            description=HF_JOBS_TOOL_SPEC["description"],
+            parameters=HF_JOBS_TOOL_SPEC["parameters"],
+            handler=hf_jobs_handler,
+        ),
+        # HF Repo management tools
+        ToolSpec(
+            name=HF_REPO_FILES_TOOL_SPEC["name"],
+            description=HF_REPO_FILES_TOOL_SPEC["description"],
+            parameters=HF_REPO_FILES_TOOL_SPEC["parameters"],
+            handler=hf_repo_files_handler,
+        ),
+        ToolSpec(
+            name=HF_REPO_GIT_TOOL_SPEC["name"],
+            description=HF_REPO_GIT_TOOL_SPEC["description"],
+            parameters=HF_REPO_GIT_TOOL_SPEC["parameters"],
+            handler=hf_repo_git_handler,
+        ),
+        ToolSpec(
+            name=GITHUB_FIND_EXAMPLES_TOOL_SPEC["name"],
+            description=GITHUB_FIND_EXAMPLES_TOOL_SPEC["description"],
+            parameters=GITHUB_FIND_EXAMPLES_TOOL_SPEC["parameters"],
+            handler=github_find_examples_handler,
+        ),
+        ToolSpec(
+            name=GITHUB_LIST_REPOS_TOOL_SPEC["name"],
+            description=GITHUB_LIST_REPOS_TOOL_SPEC["description"],
+            parameters=GITHUB_LIST_REPOS_TOOL_SPEC["parameters"],
+            handler=github_list_repos_handler,
+        ),
+        ToolSpec(
+            name=GITHUB_READ_FILE_TOOL_SPEC["name"],
+            description=GITHUB_READ_FILE_TOOL_SPEC["description"],
+            parameters=GITHUB_READ_FILE_TOOL_SPEC["parameters"],
+            handler=github_read_file_handler,
+        ),
+    ]
+    tool_names = ", ".join([t.name for t in tools])
+    logger.info(f"Loaded {len(tools)} built-in tools: {tool_names}")
+    return tools

agent/main.py ADDED Viewed

	@@ -0,0 +1,567 @@

+"""
+Interactive CLI chat with the agent
+"""
+import asyncio
+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Optional
+import litellm
+from lmnr import Laminar, LaminarLiteLLMCallback
+from prompt_toolkit import PromptSession
+from agent.config import load_config
+from agent.core.agent_loop import submission_loop
+from agent.core.session import OpType
+from agent.core.tools import ToolRouter
+from agent.utils.reliability_checks import check_training_script_save_pattern
+from agent.utils.terminal_display import (
+    format_error,
+    format_header,
+    format_plan_display,
+    format_separator,
+    format_success,
+    format_tool_call,
+    format_tool_output,
+    format_turn_complete,
+)
+litellm.drop_params = True
+def _safe_get_args(arguments: dict) -> dict:
+    """Safely extract args dict from arguments, handling cases where LLM passes string."""
+    args = arguments.get("args", {})
+    # Sometimes LLM passes args as string instead of dict
+    if isinstance(args, str):
+        return {}
+    return args if isinstance(args, dict) else {}
+lmnr_api_key = os.environ.get("LMNR_API_KEY")
+if lmnr_api_key:
+    try:
+        Laminar.initialize(project_api_key=lmnr_api_key)
+        litellm.callbacks = [LaminarLiteLLMCallback()]
+        print("Laminar initialized")
+    except Exception as e:
+        print(f"Failed to initialize Laminar: {e}")
+@dataclass
+class Operation:
+    """Operation to be executed by the agent"""
+    op_type: OpType
+    data: Optional[dict[str, Any]] = None
+@dataclass
+class Submission:
+    """Submission to the agent loop"""
+    id: str
+    operation: Operation
+async def event_listener(
+    event_queue: asyncio.Queue,
+    submission_queue: asyncio.Queue,
+    turn_complete_event: asyncio.Event,
+    ready_event: asyncio.Event,
+    prompt_session: PromptSession,
+    config=None,
+) -> None:
+    """Background task that listens for events and displays them"""
+    submission_id = [1000]  # Use list to make it mutable in closure
+    last_tool_name = [None]  # Track last tool called
+    while True:
+        try:
+            event = await event_queue.get()
+            # Display event
+            if event.event_type == "ready":
+                print(format_success("\U0001f917 Agent ready"))
+                ready_event.set()
+            elif event.event_type == "assistant_message":
+                content = event.data.get("content", "") if event.data else ""
+                if content:
+                    print(f"\nAssistant: {content}")
+            elif event.event_type == "tool_call":
+                tool_name = event.data.get("tool", "") if event.data else ""
+                arguments = event.data.get("arguments", {}) if event.data else {}
+                if tool_name:
+                    last_tool_name[0] = tool_name  # Store for tool_output event
+                    args_str = json.dumps(arguments)[:100] + "..."
+                    print(format_tool_call(tool_name, args_str))
+            elif event.event_type == "tool_output":
+                output = event.data.get("output", "") if event.data else ""
+                success = event.data.get("success", False) if event.data else False
+                if output:
+                    # Don't truncate plan_tool output, truncate everything else
+                    should_truncate = last_tool_name[0] != "plan_tool"
+                    print(format_tool_output(output, success, truncate=should_truncate))
+            elif event.event_type == "turn_complete":
+                print(format_turn_complete())
+                # Display plan after turn complete
+                plan_display = format_plan_display()
+                if plan_display:
+                    print(plan_display)
+                turn_complete_event.set()
+            elif event.event_type == "error":
+                error = (
+                    event.data.get("error", "Unknown error")
+                    if event.data
+                    else "Unknown error"
+                )
+                print(format_error(error))
+                turn_complete_event.set()
+            elif event.event_type == "shutdown":
+                break
+            elif event.event_type == "processing":
+                pass  # print("Processing...", flush=True)
+            elif event.event_type == "compacted":
+                old_tokens = event.data.get("old_tokens", 0) if event.data else 0
+                new_tokens = event.data.get("new_tokens", 0) if event.data else 0
+                print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
+            elif event.event_type == "approval_required":
+                # Handle batch approval format
+                tools_data = event.data.get("tools", []) if event.data else []
+                count = event.data.get("count", 0) if event.data else 0
+                # If yolo mode is active, auto-approve everything
+                if config and config.yolo_mode:
+                    approvals = [
+                        {
+                            "tool_call_id": t.get("tool_call_id", ""),
+                            "approved": True,
+                            "feedback": None,
+                        }
+                        for t in tools_data
+                    ]
+                    print(f"\n⚡ YOLO MODE: Auto-approving {count} item(s)")
+                    submission_id[0] += 1
+                    approval_submission = Submission(
+                        id=f"approval_{submission_id[0]}",
+                        operation=Operation(
+                            op_type=OpType.EXEC_APPROVAL,
+                            data={"approvals": approvals},
+                        ),
+                    )
+                    await submission_queue.put(approval_submission)
+                    continue
+                print("\n" + format_separator())
+                print(
+                    format_header(
+                        f"APPROVAL REQUIRED ({count} item{'s' if count != 1 else ''})"
+                    )
+                )
+                print(format_separator())
+                approvals = []
+                # Ask for approval for each tool
+                for i, tool_info in enumerate(tools_data, 1):
+                    tool_name = tool_info.get("tool", "")
+                    arguments = tool_info.get("arguments", {})
+                    tool_call_id = tool_info.get("tool_call_id", "")
+                    # Handle case where arguments might be a JSON string
+                    if isinstance(arguments, str):
+                        try:
+                            arguments = json.loads(arguments)
+                        except json.JSONDecodeError:
+                            print(f"Warning: Failed to parse arguments for {tool_name}")
+                            arguments = {}
+                    operation = arguments.get("operation", "")
+                    print(f"\n[Item {i}/{count}]")
+                    print(f"Tool: {tool_name}")
+                    print(f"Operation: {operation}")
+                    # Handle different tool types
+                    if tool_name == "hf_jobs":
+                        # Check if this is Python mode (script) or Docker mode (command)
+                        script = arguments.get("script")
+                        command = arguments.get("command")
+                        if script:
+                            # Python mode
+                            dependencies = arguments.get("dependencies", [])
+                            python_version = arguments.get("python")
+                            script_args = arguments.get("script_args", [])
+                            # Show full script
+                            print(f"Script:\n{script}")
+                            if dependencies:
+                                print(f"Dependencies: {', '.join(dependencies)}")
+                            if python_version:
+                                print(f"Python version: {python_version}")
+                            if script_args:
+                                print(f"Script args: {' '.join(script_args)}")
+                            # Run reliability checks on the full script (not truncated)
+                            check_message = check_training_script_save_pattern(script)
+                            if check_message:
+                                print(check_message)
+                        elif command:
+                            # Docker mode
+                            image = arguments.get("image", "python:3.12")
+                            command_str = (
+                                " ".join(command)
+                                if isinstance(command, list)
+                                else str(command)
+                            )
+                            print(f"Docker image: {image}")
+                            print(f"Command: {command_str}")
+                        # Common parameters for jobs
+                        hardware_flavor = arguments.get("hardware_flavor", "cpu-basic")
+                        timeout = arguments.get("timeout", "30m")
+                        env = arguments.get("env", {})
+                        schedule = arguments.get("schedule")
+                        print(f"Hardware: {hardware_flavor}")
+                        print(f"Timeout: {timeout}")
+                        if env:
+                            env_keys = ", ".join(env.keys())
+                            print(f"Environment variables: {env_keys}")
+                        if schedule:
+                            print(f"Schedule: {schedule}")
+                    elif tool_name == "hf_private_repos":
+                        # Handle private repo operations
+                        args = _safe_get_args(arguments)
+                        if operation in ["create_repo", "upload_file"]:
+                            repo_id = args.get("repo_id", "")
+                            repo_type = args.get("repo_type", "dataset")
+                            # Build repo URL
+                            type_path = "" if repo_type == "model" else f"{repo_type}s"
+                            repo_url = (
+                                f"https://huggingface.co/{type_path}/{repo_id}".replace(
+                                    "//", "/"
+                                )
+                            )
+                            print(f"Repository: {repo_id}")
+                            print(f"Type: {repo_type}")
+                            print("Private: Yes")
+                            print(f"URL: {repo_url}")
+                            # Show file preview for upload_file operation
+                            if operation == "upload_file":
+                                path_in_repo = args.get("path_in_repo", "")
+                                file_content = args.get("file_content", "")
+                                print(f"File: {path_in_repo}")
+                                if isinstance(file_content, str):
+                                    # Calculate metrics
+                                    all_lines = file_content.split("\n")
+                                    line_count = len(all_lines)
+                                    size_bytes = len(file_content.encode("utf-8"))
+                                    size_kb = size_bytes / 1024
+                                    size_mb = size_kb / 1024
+                                    print(f"Line count: {line_count}")
+                                    if size_kb < 1024:
+                                        print(f"Size: {size_kb:.2f} KB")
+                                    else:
+                                        print(f"Size: {size_mb:.2f} MB")
+                                    # Show preview
+                                    preview_lines = all_lines[:5]
+                                    preview = "\n".join(preview_lines)
+                                    print(
+                                        f"Content preview (first 5 lines):\n{preview}"
+                                    )
+                                    if len(all_lines) > 5:
+                                        print("...")
+                    elif tool_name == "hf_repo_files":
+                        # Handle repo files operations (upload, delete)
+                        repo_id = arguments.get("repo_id", "")
+                        repo_type = arguments.get("repo_type", "model")
+                        revision = arguments.get("revision", "main")
+                        # Build repo URL
+                        if repo_type == "model":
+                            repo_url = f"https://huggingface.co/{repo_id}"
+                        else:
+                            repo_url = f"https://huggingface.co/{repo_type}s/{repo_id}"
+                        print(f"Repository: {repo_id}")
+                        print(f"Type: {repo_type}")
+                        print(f"Branch: {revision}")
+                        print(f"URL: {repo_url}")
+                        if operation == "upload":
+                            path = arguments.get("path", "")
+                            content = arguments.get("content", "")
+                            create_pr = arguments.get("create_pr", False)
+                            print(f"File: {path}")
+                            if create_pr:
+                                print("Mode: Create PR")
+                            if isinstance(content, str):
+                                all_lines = content.split("\n")
+                                line_count = len(all_lines)
+                                size_bytes = len(content.encode("utf-8"))
+                                size_kb = size_bytes / 1024
+                                print(f"Lines: {line_count}")
+                                if size_kb < 1024:
+                                    print(f"Size: {size_kb:.2f} KB")
+                                else:
+                                    print(f"Size: {size_kb / 1024:.2f} MB")
+                                # Show full content
+                                print(f"Content:\n{content}")
+                        elif operation == "delete":
+                            patterns = arguments.get("patterns", [])
+                            if isinstance(patterns, str):
+                                patterns = [patterns]
+                            print(f"Patterns to delete: {', '.join(patterns)}")
+                    elif tool_name == "hf_repo_git":
+                        # Handle git operations (branches, tags, PRs, repo management)
+                        repo_id = arguments.get("repo_id", "")
+                        repo_type = arguments.get("repo_type", "model")
+                        # Build repo URL
+                        if repo_type == "model":
+                            repo_url = f"https://huggingface.co/{repo_id}"
+                        else:
+                            repo_url = f"https://huggingface.co/{repo_type}s/{repo_id}"
+                        print(f"Repository: {repo_id}")
+                        print(f"Type: {repo_type}")
+                        print(f"URL: {repo_url}")
+                        if operation == "delete_branch":
+                            branch = arguments.get("branch", "")
+                            print(f"Branch to delete: {branch}")
+                        elif operation == "delete_tag":
+                            tag = arguments.get("tag", "")
+                            print(f"Tag to delete: {tag}")
+                        elif operation == "merge_pr":
+                            pr_num = arguments.get("pr_num", "")
+                            print(f"PR to merge: #{pr_num}")
+                        elif operation == "create_repo":
+                            private = arguments.get("private", False)
+                            space_sdk = arguments.get("space_sdk")
+                            print(f"Private: {private}")
+                            if space_sdk:
+                                print(f"Space SDK: {space_sdk}")
+                        elif operation == "update_repo":
+                            private = arguments.get("private")
+                            gated = arguments.get("gated")
+                            if private is not None:
+                                print(f"Private: {private}")
+                            if gated is not None:
+                                print(f"Gated: {gated}")
+                    # Get user decision for this item
+                    response = await prompt_session.prompt_async(
+                        f"Approve item {i}? (y=yes, yolo=approve all, n=no, or provide feedback): "
+                    )
+                    response = response.strip().lower()
+                    # Handle yolo mode activation
+                    if response == "yolo":
+                        config.yolo_mode = True
+                        print(
+                            "⚡ YOLO MODE ACTIVATED - Auto-approving all future tool calls"
+                        )
+                        # Auto-approve this item and all remaining
+                        approvals.append(
+                            {
+                                "tool_call_id": tool_call_id,
+                                "approved": True,
+                                "feedback": None,
+                            }
+                        )
+                        for remaining in tools_data[i:]:
+                            approvals.append(
+                                {
+                                    "tool_call_id": remaining.get("tool_call_id", ""),
+                                    "approved": True,
+                                    "feedback": None,
+                                }
+                            )
+                        break
+                    approved = response in ["y", "yes"]
+                    feedback = None if approved or response in ["n", "no"] else response
+                    approvals.append(
+                        {
+                            "tool_call_id": tool_call_id,
+                            "approved": approved,
+                            "feedback": feedback,
+                        }
+                    )
+                # Submit batch approval
+                submission_id[0] += 1
+                approval_submission = Submission(
+                    id=f"approval_{submission_id[0]}",
+                    operation=Operation(
+                        op_type=OpType.EXEC_APPROVAL,
+                        data={"approvals": approvals},
+                    ),
+                )
+                await submission_queue.put(approval_submission)
+                print(format_separator() + "\n")
+            # Silently ignore other events
+        except asyncio.CancelledError:
+            break
+        except Exception as e:
+            print(f"Event listener error: {e}")
+async def get_user_input(prompt_session: PromptSession) -> str:
+    """Get user input asynchronously"""
+    from prompt_toolkit.formatted_text import HTML
+    return await prompt_session.prompt_async(HTML("\n<b><cyan>></cyan></b> "))
+async def main():
+    """Interactive chat with the agent"""
+    from agent.utils.terminal_display import Colors
+    # Clear screen
+    os.system("clear" if os.name != "nt" else "cls")
+    banner = r"""
+  _   _                   _               _____                   _                    _
+ | | | |_   _  __ _  __ _(_)_ __   __ _  |  ___|_ _  ___ ___     / \   __ _  ___ _ __ | |_
+ | |_| | | | |/ _` |/ _` | | '_ \ / _` | | |_ / _` |/ __/ _ \   / _ \ / _` |/ _ \ '_ \| __|
+ |  _  | |_| | (_| | (_| | | | | | (_| | |  _| (_| | (_|  __/  / ___ \ (_| |  __/ | | | |_
+ |_| |_|\__,_|\__, |\__, |_|_| |_|\__, | |_|  \__,_|\___\___| /_/   \_\__, |\___|_| |_|\__|
+              |___/ |___/         |___/                               |___/
+    """
+    print(format_separator())
+    print(f"{Colors.YELLOW} {banner}{Colors.RESET}")
+    print("Type your messages below. Type 'exit', 'quit', or '/quit' to end.\n")
+    print(format_separator())
+    # Wait for agent to initialize
+    print("Initializing agent...")
+    # Create queues for communication
+    submission_queue = asyncio.Queue()
+    event_queue = asyncio.Queue()
+    # Events to signal agent state
+    turn_complete_event = asyncio.Event()
+    turn_complete_event.set()
+    ready_event = asyncio.Event()
+    # Start agent loop in background
+    config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
+    config = load_config(config_path)
+    # Create tool router
+    print(f"Loading MCP servers: {', '.join(config.mcpServers.keys())}")
+    tool_router = ToolRouter(config.mcpServers)
+    # Create prompt session for input
+    prompt_session = PromptSession()
+    agent_task = asyncio.create_task(
+        submission_loop(
+            submission_queue,
+            event_queue,
+            config=config,
+            tool_router=tool_router,
+        )
+    )
+    # Start event listener in background
+    listener_task = asyncio.create_task(
+        event_listener(
+            event_queue,
+            submission_queue,
+            turn_complete_event,
+            ready_event,
+            prompt_session,
+            config,
+        )
+    )
+    await ready_event.wait()
+    submission_id = 0
+    try:
+        while True:
+            # Wait for previous turn to complete
+            await turn_complete_event.wait()
+            turn_complete_event.clear()
+            # Get user input
+            try:
+                user_input = await get_user_input(prompt_session)
+            except EOFError:
+                break
+            # Check for exit commands
+            if user_input.strip().lower() in ["exit", "quit", "/quit", "/exit"]:
+                break
+            # Skip empty input
+            if not user_input.strip():
+                turn_complete_event.set()
+                continue
+            # Submit to agent
+            submission_id += 1
+            submission = Submission(
+                id=f"sub_{submission_id}",
+                operation=Operation(
+                    op_type=OpType.USER_INPUT, data={"text": user_input}
+                ),
+            )
+            # print(f"Main submitting: {submission.operation.op_type}")
+            await submission_queue.put(submission)
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+    # Shutdown
+    print("\n🛑 Shutting down agent...")
+    shutdown_submission = Submission(
+        id="sub_shutdown", operation=Operation(op_type=OpType.SHUTDOWN)
+    )
+    await submission_queue.put(shutdown_submission)
+    await asyncio.wait_for(agent_task, timeout=5.0)
+    listener_task.cancel()
+    print("✨ Goodbye!\n")
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n\n✨ Goodbye!")

agent/prompts/system_prompt.yaml ADDED Viewed

	@@ -0,0 +1,170 @@

+system_prompt: |
+  You are Hugging Face Agent, a skilled AI assistant for machine learning engineering. Hugging Face is a company that provides two main services : libraries to write deep learning tasks, and resources (models, datasets, compute) to execute them. You will aid users to do these tasks, interacting with the Hugging Face stack via {{ num_tools }}.
+  # General behavior
+  Your main goal is to achieve what the user asked. For this proactive in the quantity of actions taken. However, never make big decisions in place of the user. For example, confirm with user which models or datasets to use, or major training decisions.
+  # Task Approach.
+  **CRITICAL : Research first, Then Implement**
+  For ANY implementation task (training, fine-tuning, inference, data processing, etc.), you should proceed in these three mandatory steps:
+  1. **FIRST**: Search HF documentation to find the correct approach.
+   - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., "trl", "transformers", "diffusers").
+   - Use `fetch_hf_docs` to retrieve full content from the relevant pages you've found.
+   - Use `search_hf_api_endpoints` to find API endpoints with usage examples.
+   - Skip ONLY for simple factual questions (e.g., "What is LoRA?")
+  2. **THEN**: Formulate a plan based on research findings. Pass todos to the PlanTool. Update frequently to show when progress is made. This will also help you decompose hard tasks.
+  3. **FINALLY**: Implement using researched approaches
+   - Search Hugging Face hub to find the exact user-specified model and dataset. If you can't find it and are thinking about changing model / dataset, confirm explicitely with user beforehand.
+   - If user has not provided the model or the dataset, suggest different options, and make the user choose before proceeding.
+   - Use all available tools to complete the task.
+   - Invoke multiple independent tools simultaneously for efficiency
+  # Available Tools
+  You have access to the following main categories of tools. For each, you are provided with typical use cases, but they can have many more.
+  - Hugging Face Hub
+    - Find models, datasets, and machine learning papers
+    - Discover existing Spaces (mini-deployed AI models)
+    - Access details about specific repositories
+    - Note: models, datasets, and Spaces are all repositories
+  - Documentation and API
+    - Browse documentation across Hugging Face libraries (e.g., trl, diffusers, transformers, datasets)
+    - Read full documentation pages
+    - Search and inspect API endpoints
+  - Planning
+    - Use as a planning and to-do tool
+    - Decompose complex tasks into manageable steps
+    - Communicate plans and progress clearly with the user
+  - Jobs
+    - Run code as one-time executions on remote servers
+    - Support both simple CPU tasks and intensive GPU workloads
+  - Private Repos
+    - Manage the user’s private repositories
+    - Store and retrieve job outputs. This tool allows you to create repos and upload job results after their completion.
+    - Fix or update Spaces
+    - Reminder: repositories include models, datasets, Spaces, and generic repos
+  - Spaces
+    - Use deployed AI models
+    - Perform tasks such as image generation, OCR, and text-to-speech
+  # Additional instructions
+  - Use up-to-date python package versions. This is important. The default installations are the newest versions, so check documentation before relying on your internal outdated knowledge.
+  - Always search official documentation before implementing any ML workflow; never assume methods, libraries, or approaches
+  - Use Hugging Face documentation tools and search the Hub before building custom solutions
+  - Verify dataset structures and API details explicitly; never assume column names or schemas
+  - Base implementations on documented best practices, not general knowledge
+  - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics, and suitable hardware
+  - Treat Spaces and repos as permanent storage; job executions have no persistent files
+  - Jobs require passing the full file contents; local and remote file systems are separate
+  - HF_TOKEN is loaded from environment variables; never expose or log secrets
+  - Include direct links when referencing models, datasets, or papers
+  - Always do what the user tells you to.
+  # Communication style
+  - Be concise and direct.
+  - Don't flatter the user.
+  - Never use emojis nor exclamation points.
+  - If you are limited in a task, offer alternatives.
+  - Don't thank the user when he provides results.
+  - Explain what you're doing for non-trivial operations.
+  - If the user asks something, answer. User questions take precedent over task completion.
+  - Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
+  # Examples
+  <example>
+  User: Fine-tune a Llama-style model for instruction following on a custom dataset.
+  Assistant:
+  1. Create a plan with plan_tool outlining data loading, model selection, training, and evaluation steps.
+  2. Use explore_hf_docs to locate documentation for transformers, trl, and peft.
+  3. Use fetch_hf_docs to read the relevant documentation more precisely.
+  4. Use dataset_search to inspect available instruction datasets and confirm with the user.
+  5. Use model_search to find compatible base models and confirm choice.
+  6. Launch training with hf_jobs using documented best practices and push to hub the fine-tuned model and relevant information.
+  </example>
+  <example>
+  User: My Space crashes on startup. Can you fix it?
+  Assistant:
+  1. Create a plan with plan_tool to identify logs, runtime issues, and dependency updates.
+  2. Use hub_repo_details to inspect the Space repository and logs.
+  3. Use explore_hf_docs to find Space deployment and Gradio/Streamlit best practices.
+  4. Update files in the Space repo using hf_private_repos.
+  5. Restart and verify the Space.
+  </example>
+  <example>
+  User: Find a good dataset for image captioning and summarize its structure.
+  Assistant:
+  1. Create a plan with plan_tool for dataset discovery, inspection, and verification.
+  2. Use dataset_search with tags such as "image-captioning".
+  3. Use hub_repo_details to inspect candidate datasets.
+  4. Verify column names, splits, and licensing explicitly.
+  5. Report findings concisely and include direct links.
+  </example>
+  <example>
+  User: Generate images using a fast text-to-image model.
+  Assistant:
+  1. Create a plan with plan_tool to confirm style, resolution, and output format.
+  2. Use gr1_z_image_turbo_generate with the provided prompt.
+  3. Return generated images without additional commentary.
+  </example>
+  <example>
+  User: Run inference with a specific text classification model on my text file.
+  Assistant:
+  1. Create a plan with plan_tool for loading data, selecting model, and running inference.
+  2. Use model_search to locate the exact model and confirm with the user.
+  3. Use explore_hf_docs and fetch_hf_docs to find the correct inference API.
+  4. Execute the script with hf_jobs.
+  </example>
+  <example>
+  User: Is there recent research on parameter-efficient fine-tuning?
+  Assistant:
+  1. Create a plan with plan_tool to search, filter, and summarize relevant papers.
+  2. Use paper_search with semantic queries related to PEFT.
+  3. Identify relevant papers and verify publication details.
+  4. Summarize key findings briefly and include direct links.
+  </example>
+  <example>
+  User: Build a small demo that does OCR on images.
+  Assistant:
+  1. Create a plan with plan_tool to define input, OCR method, and demo output.
+  2. Use space_search to find existing OCR Spaces for reference.
+  3. Use explore_hf_docs to review OCR-related pipelines.
+  4. Implement using dynamic_space to execute OCR tasks.
+  </example>
+  <example>
+  User: What models are trending right now for speech recognition?
+  Assistant:
+  1. Create a plan with plan_tool to filter models by task and relevance.
+  2. Use model_search with task filters for speech recognition.
+  3. Sort by trending or downloads.
+  4. Report top results with short descriptions and links.
+  </example>

agent/prompts/system_prompt_v2.yaml ADDED Viewed

	@@ -0,0 +1,626 @@

+system_prompt: |
+  You are Hugging Face Agent, a skilled AI assistant for machine learning engineering with deep expertise in the Hugging Face ecosystem. You help users accomplish ML tasks (training, fine-tuning, data processing, inference, evaluation) by interacting with Hugging Face services via {{ num_tools }} specialized tools.
+  _Current Time: **{{ current_date }} {{ current_time }} ({{ current_timezone }})**_
+  {% if hf_user_info %}_AUTHENTICATED ON HF AS: **{{ hf_user_info }}**_{% endif %}
+  # Core Mission & Behavior
+  Your primary goal is to successfully complete what the user requested with ZERO ERRORS. You are fully autonomous in executing tasks - research thoroughly, validate resources, choose optimal configurations, and proceed directly to implementation.
+  **Success Criteria for Long-Running Complex Tasks:**
+  - Research current documentation before implementing
+  - Validate all resources (models, datasets, formats)
+  - Set appropriate timeouts and hardware
+  - Handle async operations correctly
+  - Ensure result persistence
+  - Communicate progress clearly
+  - Handle errors gracefully with solutions
+  # ⚠️ MANDATORY Three-Phase Workflow
+  **FOR ANY ML IMPLEMENTATION TASK, YOU MUST FOLLOW THIS WORKFLOW:**
+  ## PHASE 1: RESEARCH (Mandatory - Never Skip)
+  ⚠️ **CRITICAL:** Your training data is outdated. NEVER implement ML tasks without checking current documentation AND working example code first. APIs, best practices, and methods change frequently.
+  **Research Checklist:**
+  1. ✅ **Identify relevant libraries** (TRL for training, datasets for data, PEFT for LoRA, trackio for monitoring)
+  2. ✅ **Find working example code FIRST**: `github_find_examples({"repo": "trl", "keyword": "grpo"})`
+     - ⚠️ MANDATORY: Find reference implementations before coding
+     - Returns: Working scripts/notebooks from examples/ and scripts/ directories
+     - Shows: Current API usage, proven patterns, best practices
+  3. ✅ **Read example implementations**: `github_read_file({"repo": "huggingface/trl", "path": "examples/scripts/..."})`
+     - Study working code to understand current APIs
+     - See actual trainer configurations, parameters, imports
+     - Learn from production-ready implementations
+  4. ✅ **Explore documentation structure**: `explore_hf_docs(<endpoint>)`
+     - For training: "trl", "peft", "accelerate"
+     - For data: "datasets", "dataset-viewer"
+     - For monitoring: "trackio"
+     - For inference: "vllm", "inference-endpoints"
+  5. ✅ **Fetch specific documentation**: `fetch_hf_docs(<url>)` from explore results
+  6. ✅ **Find API endpoints if needed**: `find_hf_api(query="space logs")` or `find_hf_api(tag="spaces")` for REST API operations
+  **✓ CORRECT Research Pattern:**
+  ```python
+  # User requests: "Fine-tune a model for instruction following using SFT"
+  # Step 1: Find working example code FIRST
+  github_find_examples({"repo": "trl", "keyword": "sft", "org": "huggingface"})
+  # Returns: examples/scripts/sft.py, examples/scripts/sft_vlm.py
+  # Step 2: Read the example implementation
+  github_read_file({"repo": "huggingface/trl", "path": "examples/scripts/sft.py"})
+  # Study: imports, SFTTrainer usage, SFTConfig parameters, dataset handling
+  # Step 3: Explore TRL documentation for details
+  explore_hf_docs("trl")  # Discover available pages
+  # Step 4: Fetch specific trainer documentation
+  fetch_hf_docs("https://huggingface.co/docs/trl/sft_trainer")  # Get SFTTrainer details
+  fetch_hf_docs("https://huggingface.co/docs/trl/sft_config")  # Get SFTConfig parameters
+  # Step 5: Research related libraries if needed
+  explore_hf_docs("peft")  # For LoRA if memory constrained
+  fetch_hf_docs("https://huggingface.co/docs/peft/quickstart")
+  # Step 6: Research monitoring
+  explore_hf_docs("trackio")
+  fetch_hf_docs("https://huggingface.co/docs/trackio/quickstart")
+  # Now I have: working example code + current documentation + API details
+  # Proceed to Phase 2 with accurate, proven implementation patterns
+  ```
+  **✗ WRONG - Skipping Research:**
+  ```python
+  # User requests: "Fine-tune a model"
+  # Immediately creating training script based on internal knowledge
+  # This will likely use outdated APIs or wrong patterns!
+  ```
+  **✗ ALSO WRONG - Documentation Only (No Example Code):**
+  ```python
+  # User requests: "Fine-tune a model"
+  # Only reading docs, not looking at working examples
+  explore_hf_docs("trl")
+  fetch_hf_docs("https://...")
+  # This misses proven patterns and actual working code!
+  ```
+  **✗ ALSO WRONG - Using PEFT without being asked for it explicitly:**
+  ```python
+  # User requests: "Fine-tune a model"
+  # Using PEFT without being asked for it explicitly
+  explore_hf_docs("peft")
+  fetch_hf_docs("https://...")
+  # This is not what the user asked for!
+  ```
+  **Skip Research ONLY for:**
+  - Simple factual questions ("What is LoRA?", "What is DPO?")
+  - Status checks (`hf_jobs("ps")`, `hf_jobs("logs", job_id="xxx")`)
+  - Resource discovery (`model_search`, `dataset_search`, `paper_search`)
+  - Trivial operations that don't require implementation
+  **Why This Matters:**
+   - Working code shows current APIs (prevents outdated internal knowledge)
+   - Examples demonstrate proven patterns (prevents trial-and-error)
+   - Real implementations reveal best practices (prevents anti-patterns)
+  ## PHASE 2: PLAN & VALIDATE (Required for Multi-Step Tasks)
+  ⚠️ **CRITICAL:** Break down complex tasks and validate resources BEFORE executing.
+  ### Step 1: Create Execution Plan
+  Use `plan_tool` for any task with 3+ steps:
+  ```python
+  plan_tool({
+      "todos": [
+          {"id": "1", "content": "Research TRL SFT documentation", "status": "completed"},
+          {"id": "2", "content": "Find and verify base model", "status": "in_progress"},
+          {"id": "3", "content": "Find dataset and validate columns and conversational format", "status": "pending"},
+          {"id": "4", "content": "Create training script with Trackio", "status": "pending"},
+          {"id": "5", "content": "Submit training job with correct config", "status": "pending"},
+          {"id": "6", "content": "Provide monitoring URLs and expectations", "status": "pending"}
+      ]
+  })
+  ```
+  **Plan Requirements:**
+  - Exactly ONE task `in_progress` at a time
+  - Mark `completed` IMMEDIATELY after finishing (don't batch)
+  - Update plan frequently to show progress
+  - Only mark `completed` when fully done with no errors
+  - Keep `pending` if blocked - create new task to resolve blocker
+  ### Step 2: Discover & Validate Resources
+  **For Training Tasks:**
+  1. ✅ **Find base model:**
+     ```python
+     model_search({"query": "qwen3 4b instuct", "sort": "downloads", "limit": 5})
+     ```
+  2. ✅ **Get model details:**
+     ```python
+     hub_repo_details({"repo_ids": ["Qwen/Qwen3-4B-Instruct-2507"]})
+     # Verify: size, architecture, license, suitability
+     ```
+  3. ✅ **Find training dataset:**
+     ```python
+     dataset_search({"query": "instruct chat", "tags": ["conversational"], "limit": 5})
+     ```
+  4. ✅ **Get dataset details AND VALIDATE FORMAT:**
+     ```python
+     hub_repo_details({"repo_ids": ["HuggingFaceH4/ultrachat_200k"]})
+     # ⚠️ CRITICAL: Verify dataset columns and format (must be conversational) matches training method!
+     # - SFT: needs "messages", "text", or "prompt"/"completion"
+     # - DPO: needs "prompt", "chosen", "rejected"
+     # - GRPO: needs "prompt" only
+     ```
+  5. ✅ **Select optimal resources:**
+     - Choose most suitable model for task (size, quality, performance balance) if the user has not specified a model
+     - Select appropriate dataset with verified format compatibility if the user has not specified a dataset
+     - Determine optimal hardware based on model size and budget efficiency
+     - Proceed directly to implementation after validation
+  **Dataset Format Validation is CRITICAL:**
+  - Training will FAIL if format doesn't match method and is not conversational
+  - ALWAYS check with `hub_repo_details` before training
+  - Different training methods have different requirements
+  - Validate format matches method before proceeding
+  **For Data Processing Tasks:**
+  1. ✅ Find dataset with `dataset_search`
+  2. ✅ Verify structure with `hub_repo_details`
+  3. ✅ Determine optimal processing approach based on requirements
+  4. ✅ Plan output format and destination
+  ## PHASE 3: IMPLEMENT (Execute with Researched Approaches)
+  ### For Training Tasks
+  ⚠️ **TRAINING REQUIREMENTS CHECKLIST:**
+  **Before Submission:**
+  - [ ] Researched current TRL documentation
+  - [ ] Found and verified base model
+  - [ ] Found dataset and VALIDATED columns and conversational format matches method
+  - [ ] Selected optimal model + dataset + hardware configuration
+  - [ ] Created plan with plan_tool
+  - [ ] Researched Trackio monitoring setup
+  **Training Script MUST Include:**
+  - [ ] Imports from researched documentation (current APIs)
+  - [ ] Trackio initialization with project/run_name/config
+  - [ ] Model and tokenizer loading
+  - [ ] Dataset loading with verified columns and conversational format
+  - [ ] Training config with ALL critical settings:
+    - `push_to_hub=True` ⚠️ MANDATORY
+    - `hub_model_id="username/model-name"` ⚠️ MANDATORY
+    - `report_to=["trackio"]` (for monitoring)
+    - `output_dir="./output"`
+    - `num_train_epochs`, `per_device_train_batch_size`, `learning_rate`
+    - `logging_steps`, `save_steps`
+    - `max_length` if needed (default 1024 usually fine)
+  - [ ] Trainer initialization with model, args, dataset, tokenizer
+  - [ ] `trainer.train()` call
+  - [ ] `trainer.push_to_hub()` at end ⚠️ MANDATORY
+  - [ ] `tracker.finish()` for Trackio
+  **Job Configuration MUST Include:**
+  - [ ] `operation`: "run" (for one-time) or "scheduled run" (for recurring)
+  - [ ] `script`: Training script with all above elements
+  - [ ] `dependencies`: ['transformers', 'trl', 'torch', 'datasets', 'trackio']
+  - [ ] `hardware_flavor`: Based on model size (see hf_jobs tool for detailed vCPU/RAM/GPU specs):
+    - 1-3B models: `t4-small` (4vCPU/15GB/GPU 16GB) for demos or `a10g-small` (4vCPU/14GB/GPU 24GB) for production
+    - 7-13B models: `a10g-large` (12vCPU/46GB/GPU 24GB)
+    - 30B+ models: `a100-large` (12vCPU/142GB/GPU 80GB)
+    - 70B+ models: `h100` (23vCPU/240GB/GPU 80GB) or `h100x8` for distributed
+  - [ ] `timeout`: ⚠️ CRITICAL - Set based on model/data size:
+    - Small models (1-3B): "2h" to "4h"
+    - Medium models (7-13B): "4h" to "8h"
+    - Large models (30B+): "8h" to "24h"
+    - **NEVER use default 30m for training!**
+  ### For Data Processing Tasks
+  **Script Requirements:**
+  - Load dataset with `load_dataset`
+  - Process according to user requirements
+  - Push results with `push_to_hub()` or upload to `hf_private_repos`
+  **Job Configuration:**
+  - Use `cpu-upgrade` or `cpu-performance` for most data tasks
+  - Set timeout based on dataset size (1-4 hours typical)
+  ### For Inference Tasks
+  **Pattern:**
+  1. Research inference approach in docs
+  2. Find model with `model_search` + `hub_repo_details`
+  3. Create inference script with pipeline or generate
+  4. Submit with `hf_jobs` on appropriate hardware
+  5. Provide monitoring info
+  ### For Evaluation Tasks
+  **Pattern:**
+  1. Research evaluation framework (lighteval, lm-evaluation-harness)
+  2. Find model to evaluate
+  3. Create evaluation script
+  4. Submit job with appropriate hardware
+  5. Store results with `hf_private_repos`
+  # Tool Usage Patterns for Reliability
+  ## GitHub Code Research Tools (⚠️ CRITICAL - Use BEFORE Implementing)
+  **github_find_examples:**
+  - ⚠️ MANDATORY: ALWAYS use before implementing ML tasks
+  - Find working example code (scripts, notebooks, tutorials) in repositories
+  - Use to discover current implementations BEFORE writing code
+  - Pattern: find_examples → read_file → implement using proven patterns
+  - Shows: Current API usage, best practices, working configurations
+  - Example: `github_find_examples({"repo": "trl", "keyword": "grpo"})`
+  **github_read_file:**
+  - Use AFTER github_find_examples to study implementation code
+  - Read trainer classes, example scripts, configuration files
+  - Returns: File contents with line numbers (default 300 lines)
+  - Use line_start/line_end for large files
+  - Example: `github_read_file({"repo": "huggingface/trl", "path": "examples/scripts/sft.py"})`
+  **github_list_repos:**
+  - Discover libraries and repositories for a task
+  - List repos by stars, forks, update date
+  - Use when exploring what libraries exist
+  - Example: `github_list_repos({"owner": "huggingface", "sort": "stars", "limit": 10})`
+  ## Documentation Tools
+  **explore_hf_docs:**
+  - Use AFTER github_find_examples to complement example code with docs
+  - Use to discover current documentation structure
+  - Returns list of pages with 300-char glimpses
+  - Then use fetch_hf_docs for detailed content
+  **fetch_hf_docs:**
+  - Use after explore_hf_docs to get full page content
+  - Get complete API documentation, examples, parameters
+  - Critical for training tasks to get current trainer configs
+  **find_hf_api:**
+  - Find REST API endpoints by keyword search or tag browsing
+  - Use `query` for keyword search (e.g., "space logs", "organization members", "jwt token")
+  - Use `tag` to browse all endpoints in a category
+  - Returns curl examples with authentication patterns
+  - Use for API-only operations: streaming logs/metrics, org management, security scans, etc.
+  ## Hub Discovery Tools (MCP)
+  **model_search:**
+  - Find models by query, task, author, library
+  - Sort by downloads, likes, trending, created date
+  - ALWAYS verify with hub_repo_details before using
+  - Select most appropriate option based on requirements
+  **dataset_search:**
+  - Find datasets by query, tags, author
+  - Sort by downloads, likes, trending
+  - ALWAYS verify format with hub_repo_details before training
+  - Select most suitable dataset based on format and task
+  **paper_search:**
+  - Find research papers semantically
+  - Get paper abstracts and links
+  - Useful for understanding methods before implementing
+  **hub_repo_details:**
+  - Get detailed information about repos
+  - ⚠️ CRITICAL: Use this to verify dataset format before training
+  - Check model size, architecture, requirements
+  - Verify dataset columns, splits, size
+  ## Execution & Storage Tools
+  **hf_jobs:**
+  - Execute workloads on cloud infrastructure with detailed hardware specs (vCPU/RAM/GPU)
+  - ⚠️ Set timeout >30m (default too short)
+  - ⚠️ Include HF_TOKEN for Hub operations
+  - ⚠️ Storage is EPHEMERAL - must push_to_hub
+  **hf_private_repos:**
+  - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
+  - Upload logs, scripts, results that can't push_to_hub
+  - Create private repos for sensitive data
+  - Content-based: pass strings/bytes, not file paths
+  - After upload: provide repo URL to user
+  **plan_tool:**
+  - Break down complex tasks (3+ steps)
+  - Update frequently to show progress
+  - Exactly ONE task in_progress at a time
+  - Mark completed immediately after finishing
+  ## Space Tools (MCP)
+  **space_search:**
+  - Find deployed Spaces (demos, applications)
+  - Discover existing implementations
+  **use_space:**
+  - Give user access to a Space
+  - Returns link for user (may not be visible to you)
+  **dynamic_space:**
+  - Execute tasks using Space functionality
+  - Image generation, OCR, text-to-speech, etc.
+  - Only works with MCP-enabled Spaces
+  # Ground Rules for Reliability
+  ## Async Operations (Jobs, Long Tasks)
+  **✓ DO:**
+  - Poll logs automatically after submission to ensure job is running and works as expected
+  - Include Trackio dashboard URL for training jobs
+  - Note that user can check status later
+  - Explain what's happening in the background
+  **✗ DON'T:**
+  - Check status unless user asks
+  - Assume job will complete quickly
+  ## Resource Selection
+  **✓ DO:**
+  - Research and evaluate 3-5 options for models/datasets
+  - Assess key details (size, format, popularity, suitability)
+  - Select optimal option based on task requirements and efficiency
+  - ALWAYS validate dataset format matches training method before proceeding
+  - Choose hardware that balances cost and performance
+  **✗ DON'T:**
+  - Skip research and validation steps
+  - Assume most popular is automatically best for task
+  - Proceed with training without format validation
+  - Select unnecessarily expensive hardware without justification
+  ## Documentation Usage
+  **✓ DO:**
+  - Research before implementing any ML task
+  - Use explore → fetch → implement pattern
+  - Check current APIs and parameters
+  - Base implementation on researched approaches
+  **✗ DON'T:**
+  - Implement based on internal knowledge without checking docs
+  - Assume you know current API syntax
+  - Skip research for "simple" tasks
+  - Use outdated patterns or methods
+  ## Error Handling & Recovery
+  **When Errors Occur:**
+  1. ✅ Keep task in `in_progress` status (don't mark complete)
+  2. ✅ Create new todo for resolving the issue
+  3. ✅ Explain error clearly with technical details
+  4. ✅ Provide actionable solution based on error type
+  5. ✅ Check documentation if API/syntax error
+  6. ✅ Verify configuration if job fails
+  7. ✅ Implement fix and retry automatically with corrected approach
+  **Common Issues & Solutions:**
+  ### Job Timeout Exceeded
+  **Symptom:** Job stops mid-execution, incomplete
+  **Cause:** Timeout too short for workload
+  **Solution:**
+  ```python
+  # ✗ WRONG: Default timeout
+  {"timeout": "30m"}  # Too short for training!
+  # ✓ CORRECT: Appropriate timeout
+  {"timeout": "4h"}  # For 1-3B model training
+  {"timeout": "8h"}  # For 7-13B model training
+  ```
+  ### Model Not Pushed to Hub
+  **Symptom:** Training completes but model not on Hub
+  **Causes & Solutions:**
+  1. Missing `push_to_hub=True` in training config
+  2. Missing `hub_model_id` in training config
+  3. Missing `HF_TOKEN` in job env
+  4. Token lacks write permissions
+  **Solution:**
+  ```python
+  # Training config:
+  training_args = SFTConfig(
+      push_to_hub=True,  # ← Must be True
+      hub_model_id="username/model-name",  # ← Must be set
+      # ...
+  )
+  ```
+  ### Dataset Format Mismatch
+  **Symptom:** Training fails with KeyError or format errors
+  **Cause:** Dataset format doesn't match training method
+  **Solution:**
+  1. Use `hub_repo_details` to inspect dataset structure
+  2. Verify format requirements:
+     - SFT: needs "messages", "text", or "prompt"/"completion"
+     - DPO: needs "prompt", "chosen", "rejected"
+     - GRPO: needs "prompt" only
+  3. Preprocess dataset to correct format
+  4. Proceed with corrected configuration
+  ### Out of Memory (OOM)
+  **Symptom:** Job crashes with CUDA OOM error
+  **Solutions (in order of preference):**
+  1. Increase `gradient_accumulation_steps` (compensates smaller batch)
+  2. Reduce `per_device_train_batch_size` (try 4 → 2 → 1)
+  3. Enable `gradient_checkpointing=True`
+  4. Reduce `max_length` (e.g., 1024 → 512)
+  5. Upgrade to larger GPU (t4 → a10g → a100 → h100)
+  # Communication Style
+  - Be concise and direct
+  - Don't flatter the user
+  - Don't use emojis in regular communication (okay in status messages like "✅ Job submitted!")
+  - Don't use exclamation points in regular text
+  - If limited in a task, offer alternatives
+  - Don't thank user when they provide information
+  - Explain what you're doing for non-trivial operations
+  - Answer user questions directly - questions take precedence over task completion
+  - One-word answers when appropriate for simple questions
+  - For complex tasks, provide structured breakdown
+  # ⚠️ CRITICAL: Task Completion Requirements
+  **You must FULLY satisfy the user's request before finishing your turn.** Do not stop prematurely.
+  **Before ending your turn, verify:**
+  1. ✅ Did I actually finish DOING what the user asked, not just explain it/partially do it?
+  2. ✅ Did I confirm the task succeeded (job submitted, file uploaded, etc.)?
+  3. ✅ If I encountered an error, did I fix it and retry?
+  4. ✅ For jobs/async tasks: Did I provide monitoring info and expected outcomes?
+  **Common mistakes to avoid:**
+  - ✗ Stopping after "I'll help you with X" without actually doing X
+  - ✗ Explaining what you WOULD do instead of DOING it
+  - ✗ Ending after a tool call fails without retrying or fixing
+  - ✗ Stopping mid-task because you described what happens next
+  - ✗ Not providing final summary with URLs/results after completing
+  **Correct behavior:**
+  - ✓ Continue calling tools until the task is actually complete
+  - ✓ After submitting a job, provide the job URL and monitoring links
+  - ✓ After an error, diagnose and fix it, then retry
+  - ✓ End with a clear summary of what was accomplished and any next steps
+  # Examples
+  <example>
+  User: Fine-tune Llama for instruction following on ultrachat dataset
+  Assistant:
+  ✓ I'll help you fine-tune Llama for instruction following. Let me start by researching working example code and current TRL documentation.
+  [Creates plan with plan_tool: Find examples, Study code, Research docs, Find model, Validate dataset, Create script, Submit job]
+  [STEP 1: Find working example code FIRST]
+  github_find_examples({"repo": "trl", "keyword": "sft", "org": "huggingface"})
+  # Found: examples/scripts/sft.py, examples/scripts/sft_vlm.py
+  [STEP 2: Read the working implementation]
+  github_read_file({"repo": "huggingface/trl", "path": "examples/scripts/sft.py"})
+  # Studied: SFTTrainer usage, SFTConfig parameters, dataset handling, imports
+  [STEP 3: Research documentation for details]
+  [Researches: explore_hf_docs("trl"), fetch_hf_docs(SFT pages), explore_hf_docs("trackio")]
+  [STEP 4: Discover resources]
+  [Discovers resources: model_search, hub_repo_details for latest Llama models]
+  [Discovers datasets: dataset_search, hub_repo_details for ultrachat]
+  [STEP 5: Select optimal configuration]
+  After evaluating options:
+  - Selected: meta-llama/Llama-3.2-1B (1.24B params) - optimal balance of quality and efficiency
+  - Dataset: HuggingFaceH4/ultrachat_200k (207K samples, "messages" format ✓ SFT-compatible)
+  - Hardware: t4-small (4vCPU/15GB/GPU 16GB, $0.60/hr) - cost-efficient for this model size
+  - Estimated: 3 hours, ~$1.80 total cost
+  [STEP 6: Create and submit training job]
+  [Updates plan: mark resource selection complete, mark script creation in_progress]
+  [Creates script based on examples/scripts/sft.py pattern with:
+   - Imports from studied example (transformers, trl, datasets, trackio)
+   - SFTTrainer configuration from working code
+   - Dataset handling pattern from example (load_dataset + format verification)
+   - Trackio monitoring as shown in docs
+   - push_to_hub configuration with HF_TOKEN]
+  [Submits training job with hf_jobs: hardware=t4-small, timeout=4h, env=HF_TOKEN]
+  </example>
+  <example>
+  User: My Space crashes on startup
+  Assistant:
+  ✓ I'll help debug your Space. Let me inspect the repository and logs.
+  [Creates plan: Inspect repo, Identify errors, Research solutions, Fix issues]
+  [Uses hub_repo_details to get Space details and logs]
+  I can see the Space is failing with a Python import error. It's trying to import 'gradio' but the package isn't in requirements.txt.
+  [Explores documentation: explore_hf_docs("gradio"), find_hf_api(query="space logs") for streaming logs]
+  Based on the Gradio docs, I need to add gradio to requirements.txt and ensure the app.py file is correctly structured.
+  [Fixes using hf_private_repos: uploads corrected requirements.txt and app.py]
+  [Checks if the Space is running and works as expected]
+  </example>
+  <example>
+  User: Process the squad dataset - filter where context length > 100 chars and push to my Hub
+  Assistant:
+  ✓ I'll process the SQuAD dataset with your filter and push the results.
+  [Creates plan: Find dataset, Research processing, Create script, Submit job, Monitor progress]
+  [Discovers: dataset_search for squad, hub_repo_details to verify structure]
+  Found the SQuAD dataset. It has 'context' and 'question' columns.
+  I'll filter rows where len(context) > 100 characters.
+  [Researches: explore_hf_docs("datasets"), fetch_hf_docs for processing/filtering]
+  [Submits processing job with hf_jobs and makes sure to push the results to the Hub]
+  </example>
+  # Additional Instructions
+  - **Always use current information:** Find working examples with github_find_examples + check documentation before implementing; internal knowledge may be outdated
+  - **Example code first:** ALWAYS use github_find_examples + github_read_file before implementing ML tasks - real code shows current APIs and patterns
+  - **Search before building:** Use Hub search tools, GitHub code search, and documentation before creating custom solutions
+  - **Verify explicitly:** Never assume dataset schemas, column names, or API details; always check with hub_repo_details
+  - **Base on documented practices:** Implement using researched approaches from documentation, not general knowledge
+  - **Follow ML best practices:** Proper splits, reproducibility, evaluation metrics, suitable hardware
+  - **Respect storage boundaries:** Spaces and repos are permanent; job filesystems are ephemeral
+  - **Content-based operations:** For hf_private_repos, pass file contents not paths; local and remote filesystems are separate
+  - **Secure secrets:** HF_TOKEN automatically available via env; never expose or log tokens
+  - **Include links:** Provide direct URLs when referencing models, datasets, papers, jobs, repos
+  - **Execute user requests:** Always do what the user asks you to do
+  - **Parallel tool execution:** Call multiple independent tools simultaneously for efficiency when possible
+  # Token Count & Context Management
+  {{ num_tools }} tools are available. Tool descriptions are comprehensive to ensure reliable behavior for complex, long-running ML tasks. Prioritize:
+  1. Research current documentation before implementing
+  2. Validate resources before expensive operations
+  3. Handle async operations correctly
+  4. Ensure result persistence
+  5. Communicate progress and expectations clearly
+  This verbose guidance optimizes for ZERO ERRORS in production ML workflows over token efficiency.

agent/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""
+Hugging Face tools for the agent
+"""
+from agent.tools.dataset_tools import (
+    HF_INSPECT_DATASET_TOOL_SPEC,
+    hf_inspect_dataset_handler,
+)
+from agent.tools.github_find_examples import (
+    GITHUB_FIND_EXAMPLES_TOOL_SPEC,
+    github_find_examples_handler,
+)
+from agent.tools.github_list_repos import (
+    GITHUB_LIST_REPOS_TOOL_SPEC,
+    github_list_repos_handler,
+)
+from agent.tools.github_read_file import (
+    GITHUB_READ_FILE_TOOL_SPEC,
+    github_read_file_handler,
+)
+from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
+from agent.tools.types import ToolResult
+__all__ = [
+    "ToolResult",
+    "HF_JOBS_TOOL_SPEC",
+    "hf_jobs_handler",
+    "HfJobsTool",
+    "GITHUB_FIND_EXAMPLES_TOOL_SPEC",
+    "github_find_examples_handler",
+    "GITHUB_LIST_REPOS_TOOL_SPEC",
+    "github_list_repos_handler",
+    "GITHUB_READ_FILE_TOOL_SPEC",
+    "github_read_file_handler",
+    "GITHUB_SEARCH_CODE_TOOL_SPEC",
+    "github_search_code_handler",
+    "HF_INSPECT_DATASET_TOOL_SPEC",
+    "hf_inspect_dataset_handler",
+]

agent/tools/dataset_tools.py ADDED Viewed

	@@ -0,0 +1,445 @@

+"""
+Dataset Inspection Tool - Comprehensive dataset analysis in one call
+Combines /is-valid, /splits, /info, /first-rows, and /parquet endpoints
+to provide everything needed for ML tasks in a single tool call.
+"""
+import asyncio
+import os
+from typing import Any, TypedDict
+import httpx
+from agent.tools.types import ToolResult
+BASE_URL = "https://datasets-server.huggingface.co"
+# Truncation limit for long sample values in the output
+MAX_SAMPLE_VALUE_LEN = 150
+class SplitConfig(TypedDict):
+    """Typed representation of a dataset config and its splits."""
+    name: str
+    splits: list[str]
+def _get_headers() -> dict:
+    """Get auth headers for private/gated datasets"""
+    token = os.environ.get("HF_TOKEN")
+    if token:
+        return {"Authorization": f"Bearer {token}"}
+    return {}
+async def inspect_dataset(
+    dataset: str,
+    config: str | None = None,
+    split: str | None = None,
+    sample_rows: int = 3,
+) -> ToolResult:
+    """
+    Get comprehensive dataset info in one call.
+    All API calls made in parallel for speed.
+    """
+    headers = _get_headers()
+    output_parts = []
+    errors = []
+    async with httpx.AsyncClient(timeout=15, headers=headers) as client:
+        # Phase 1: Parallel calls for structure info (no dependencies)
+        is_valid_task = client.get(f"{BASE_URL}/is-valid", params={"dataset": dataset})
+        splits_task = client.get(f"{BASE_URL}/splits", params={"dataset": dataset})
+        parquet_task = client.get(f"{BASE_URL}/parquet", params={"dataset": dataset})
+        results = await asyncio.gather(
+            is_valid_task,
+            splits_task,
+            parquet_task,
+            return_exceptions=True,
+        )
+        # Process is-valid
+        if not isinstance(results[0], Exception):
+            try:
+                output_parts.append(_format_status(results[0].json()))
+            except Exception as e:
+                errors.append(f"is-valid: {e}")
+        # Process splits and auto-detect config/split
+        configs = []
+        if not isinstance(results[1], Exception):
+            try:
+                splits_data = results[1].json()
+                configs = _extract_configs(splits_data)
+                if not config:
+                    config = configs[0]["name"] if configs else "default"
+                if not split:
+                    split = configs[0]["splits"][0] if configs else "train"
+                output_parts.append(_format_structure(configs))
+            except Exception as e:
+                errors.append(f"splits: {e}")
+        if not config:
+            config = "default"
+        if not split:
+            split = "train"
+        # Process parquet (will be added at the end)
+        parquet_section = None
+        if not isinstance(results[2], Exception):
+            try:
+                parquet_section = _format_parquet_files(results[2].json())
+            except Exception:
+                pass  # Silently skip if no parquet
+        # Phase 2: Parallel calls for content (depend on config/split)
+        info_task = client.get(
+            f"{BASE_URL}/info", params={"dataset": dataset, "config": config}
+        )
+        rows_task = client.get(
+            f"{BASE_URL}/first-rows",
+            params={"dataset": dataset, "config": config, "split": split},
+            timeout=30,
+        )
+        content_results = await asyncio.gather(
+            info_task,
+            rows_task,
+            return_exceptions=True,
+        )
+        # Process info (schema)
+        if not isinstance(content_results[0], Exception):
+            try:
+                output_parts.append(_format_schema(content_results[0].json(), config))
+            except Exception as e:
+                errors.append(f"info: {e}")
+        # Process sample rows
+        if not isinstance(content_results[1], Exception):
+            try:
+                output_parts.append(
+                    _format_samples(
+                        content_results[1].json(), config, split, sample_rows
+                    )
+                )
+            except Exception as e:
+                errors.append(f"rows: {e}")
+        # Add parquet section at the end if available
+        if parquet_section:
+            output_parts.append(parquet_section)
+    # Combine output
+    formatted = f"# {dataset}\n\n" + "\n\n".join(output_parts)
+    if errors:
+        formatted += f"\n\n**Warnings:** {'; '.join(errors)}"
+    return {
+        "formatted": formatted,
+        "totalResults": 1,
+        "resultsShared": 1,
+        "isError": len(output_parts) == 0,
+    }
+def _format_status(data: dict) -> str:
+    """Format /is-valid response as status line"""
+    available = [
+        k
+        for k in ["viewer", "preview", "search", "filter", "statistics"]
+        if data.get(k)
+    ]
+    if available:
+        return f"## Status\n✓ Valid ({', '.join(available)})"
+    return "## Status\n✗ Dataset may have issues"
+def _extract_configs(splits_data: dict) -> list[SplitConfig]:
+    """Group splits by config"""
+    configs: dict[str, SplitConfig] = {}
+    for s in splits_data.get("splits", []):
+        cfg = s.get("config", "default")
+        if cfg not in configs:
+            configs[cfg] = {"name": cfg, "splits": []}
+        configs[cfg]["splits"].append(s.get("split"))
+    return list(configs.values())
+def _format_structure(configs: list[SplitConfig], max_rows: int = 10) -> str:
+    """Format configs and splits as a markdown table."""
+    lines = [
+        "## Structure (configs & splits)",
+        "| Config | Split |",
+        "|--------|-------|",
+    ]
+    total_splits = sum(len(cfg["splits"]) for cfg in configs)
+    added_rows = 0
+    for cfg in configs:
+        for split_name in cfg["splits"]:
+            if added_rows >= max_rows:
+                break
+            lines.append(f"| {cfg['name']} | {split_name} |")
+            added_rows += 1
+        if added_rows >= max_rows:
+            break
+    if total_splits > added_rows:
+        lines.append(
+            f"| ... | ... |  (_showing {added_rows} of {total_splits} config/split rows_) |"
+        )
+    return "\n".join(lines)
+def _format_schema(info: dict, config: str) -> str:
+    """Extract features and format as table"""
+    features = info.get("dataset_info", {}).get("features", {})
+    lines = [f"## Schema ({config})", "| Column | Type |", "|--------|------|"]
+    for col_name, col_info in features.items():
+        col_type = _get_type_str(col_info)
+        lines.append(f"| {col_name} | {col_type} |")
+    return "\n".join(lines)
+def _get_type_str(col_info: dict) -> str:
+    """Convert feature info to readable type string"""
+    dtype = col_info.get("dtype") or col_info.get("_type", "unknown")
+    if col_info.get("_type") == "ClassLabel":
+        names = col_info.get("names", [])
+        if names and len(names) <= 5:
+            return f"ClassLabel ({', '.join(f'{n}={i}' for i, n in enumerate(names))})"
+        return f"ClassLabel ({len(names)} classes)"
+    return str(dtype)
+def _format_samples(rows_data: dict, config: str, split: str, limit: int) -> str:
+    """Format sample rows, truncate long values"""
+    rows = rows_data.get("rows", [])[:limit]
+    lines = [f"## Sample Rows ({config}/{split})"]
+    messages_col_data = None
+    for i, row_wrapper in enumerate(rows, 1):
+        row = row_wrapper.get("row", {})
+        lines.append(f"**Row {i}:**")
+        for key, val in row.items():
+            # Check for messages column and capture first one for format analysis
+            if key.lower() == "messages" and messages_col_data is None:
+                messages_col_data = val
+            val_str = str(val)
+            if len(val_str) > MAX_SAMPLE_VALUE_LEN:
+                val_str = val_str[:MAX_SAMPLE_VALUE_LEN] + "..."
+            lines.append(f"- {key}: {val_str}")
+    # If we found a messages column, add format analysis
+    if messages_col_data is not None:
+        messages_format = _format_messages_structure(messages_col_data)
+        if messages_format:
+            lines.append("")
+            lines.append(messages_format)
+    return "\n".join(lines)
+def _format_messages_structure(messages_data: Any) -> str | None:
+    """
+    Analyze and format the structure of a messages column.
+    Common in chat/instruction datasets.
+    """
+    import json
+    # Parse if string
+    if isinstance(messages_data, str):
+        try:
+            messages_data = json.loads(messages_data)
+        except json.JSONDecodeError:
+            return None
+    if not isinstance(messages_data, list) or not messages_data:
+        return None
+    lines = ["## Messages Column Format"]
+    # Analyze message structure
+    roles_seen = set()
+    has_tool_calls = False
+    has_tool_results = False
+    message_keys = set()
+    for msg in messages_data:
+        if not isinstance(msg, dict):
+            continue
+        message_keys.update(msg.keys())
+        role = msg.get("role", "")
+        if role:
+            roles_seen.add(role)
+        if "tool_calls" in msg or "function_call" in msg:
+            has_tool_calls = True
+        if role in ("tool", "function") or msg.get("tool_call_id"):
+            has_tool_results = True
+    # Format the analysis
+    lines.append(
+        f"**Roles:** {', '.join(sorted(roles_seen)) if roles_seen else 'unknown'}"
+    )
+    # Show common message keys with presence indicators
+    common_keys = [
+        "role",
+        "content",
+        "tool_calls",
+        "tool_call_id",
+        "name",
+        "function_call",
+    ]
+    key_status = []
+    for key in common_keys:
+        if key in message_keys:
+            key_status.append(f"{key} ✓")
+        else:
+            key_status.append(f"{key} ✗")
+    lines.append(f"**Message keys:** {', '.join(key_status)}")
+    if has_tool_calls:
+        lines.append("**Tool calls:** ✓ Present")
+    if has_tool_results:
+        lines.append("**Tool results:** ✓ Present")
+    # Show example message structure
+    # Priority: 1) message with tool_calls, 2) first assistant message, 3) first non-system message
+    example = None
+    fallback = None
+    for msg in messages_data:
+        if not isinstance(msg, dict):
+            continue
+        role = msg.get("role", "")
+        # Check for actual tool_calls/function_call values (not None)
+        if msg.get("tool_calls") or msg.get("function_call"):
+            example = msg
+            break
+        if role == "assistant" and example is None:
+            example = msg
+        elif role != "system" and fallback is None:
+            fallback = msg
+    if example is None:
+        example = fallback
+    if example:
+        lines.append("")
+        lines.append("**Example message structure:**")
+        # Build a copy with truncated content but keep all keys
+        example_clean = {}
+        for key, val in example.items():
+            if key == "content" and isinstance(val, str) and len(val) > 100:
+                example_clean[key] = val[:100] + "..."
+            else:
+                example_clean[key] = val
+        lines.append("```json")
+        lines.append(json.dumps(example_clean, indent=2, ensure_ascii=False))
+        lines.append("```")
+    return "\n".join(lines)
+def _format_parquet_files(data: dict, max_rows: int = 10) -> str | None:
+    """Format parquet file info, return None if no files."""
+    files = data.get("parquet_files", [])
+    if not files:
+        return None
+    # Group by config/split
+    groups: dict[str, dict] = {}
+    for f in files:
+        key = f"{f.get('config', 'default')}/{f.get('split', 'train')}"
+        if key not in groups:
+            groups[key] = {"count": 0, "size": 0}
+        size = f.get("size") or 0
+        if not isinstance(size, (int, float)):
+            size = 0
+        groups[key]["count"] += 1
+        groups[key]["size"] += int(size)
+    lines = ["## Files (Parquet)"]
+    items = list(groups.items())
+    total_groups = len(items)
+    shown = 0
+    for key, info in items[:max_rows]:
+        size_mb = info["size"] / (1024 * 1024)
+        lines.append(f"- {key}: {info['count']} file(s) ({size_mb:.1f} MB)")
+        shown += 1
+    if total_groups > shown:
+        lines.append(f"- ... (_showing {shown} of {total_groups} parquet groups_)")
+    return "\n".join(lines)
+# Tool specification
+HF_INSPECT_DATASET_TOOL_SPEC = {
+    "name": "hf_inspect_dataset",
+    "description": (
+        "Inspect a Hugging Face dataset comprehensively in one call.\n\n"
+        "## What you get\n"
+        "- Status check (validates dataset works without errors)\n"
+        "- All configs and splits (row counts/shares may be '?' when metadata is missing)\n"
+        "- Column names and types (schema)\n"
+        "- Sample rows to understand data format\n"
+        "- Parquet file structure and sizes\n\n"
+        "## CRITICAL\n"
+        "**Always inspect datasets before writing training code** to understand:\n"
+        "- Column names for your dataloader\n"
+        "- Data types and format\n"
+        "- Available splits (train/test/validation)\n\n"
+        "Supports private/gated datasets when HF_TOKEN is set.\n\n"
+        "## Examples\n"
+        '{"dataset": "stanfordnlp/imdb"}\n'
+        '{"dataset": "nyu-mll/glue", "config": "mrpc", "sample_rows": 5}\n'
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "dataset": {
+                "type": "string",
+                "description": "Dataset ID in 'org/name' format (e.g., 'stanfordnlp/imdb')",
+            },
+            "config": {
+                "type": "string",
+                "description": "Config/subset name. Auto-detected if not specified.",
+            },
+            "split": {
+                "type": "string",
+                "description": "Split for sample rows. Auto-detected if not specified.",
+            },
+            "sample_rows": {
+                "type": "integer",
+                "description": "Number of sample rows to show (default: 3, max: 10)",
+                "default": 3,
+            },
+        },
+        "required": ["dataset"],
+    },
+}
+async def hf_inspect_dataset_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
+    try:
+        result = await inspect_dataset(
+            dataset=arguments["dataset"],
+            config=arguments.get("config"),
+            split=arguments.get("split"),
+            sample_rows=min(arguments.get("sample_rows", 3), 10),
+        )
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error inspecting dataset: {str(e)}", False

agent/tools/docs_tools.py ADDED Viewed

	@@ -0,0 +1,956 @@

+"""
+Documentation search tools for exploring HuggingFace and Gradio documentation.
+"""
+import asyncio
+import json
+import os
+from typing import Any
+import httpx
+from bs4 import BeautifulSoup
+from whoosh.analysis import StemmingAnalyzer
+from whoosh.fields import ID, TEXT, Schema
+from whoosh.filedb.filestore import RamStorage
+from whoosh.qparser import MultifieldParser, OrGroup
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+DEFAULT_MAX_RESULTS = 20
+MAX_RESULTS_CAP = 50
+GRADIO_LLMS_TXT_URL = "https://gradio.app/llms.txt"
+GRADIO_SEARCH_URL = "https://playground-worker.pages.dev/api/prompt"
+COMPOSITE_ENDPOINTS: dict[str, list[str]] = {
+    "optimum": [
+        "optimum",
+        "optimum-habana",
+        "optimum-neuron",
+        "optimum-intel",
+        "optimum-executorch",
+        "optimum-tpu",
+    ],
+    "courses": [
+        "llm-course",
+        "robotics-course",
+        "mcp-course",
+        "smol-course",
+        "agents-course",
+        "deep-rl-course",
+        "computer-vision-course",
+        "audio-course",
+        "ml-games-course",
+        "diffusion-course",
+        "ml-for-3d-course",
+        "cookbook",
+    ],
+}
+# ---------------------------------------------------------------------------
+# Caches
+# ---------------------------------------------------------------------------
+_docs_cache: dict[str, list[dict[str, str]]] = {}
+_index_cache: dict[str, tuple[Any, MultifieldParser]] = {}
+_cache_lock = asyncio.Lock()
+_openapi_cache: dict[str, Any] | None = None
+_openapi_index_cache: tuple[Any, MultifieldParser, list[dict[str, Any]]] | None = None
+# ---------------------------------------------------------------------------
+# Gradio Documentation
+# ---------------------------------------------------------------------------
+async def _fetch_gradio_docs(query: str | None = None) -> str:
+    """
+    Fetch Gradio documentation.
+    Without query: Get full documentation from llms.txt
+    With query: Run embedding search on guides/demos for relevant content
+    """
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        if not query:
+            resp = await client.get(GRADIO_LLMS_TXT_URL)
+            resp.raise_for_status()
+            return resp.text
+        resp = await client.post(
+            GRADIO_SEARCH_URL,
+            headers={
+                "Content-Type": "application/json",
+                "Origin": "https://gradio-docs-mcp.up.railway.app",
+            },
+            json={
+                "prompt_to_embed": query,
+                "SYSTEM_PROMPT": "$INSERT_GUIDES_DOCS_DEMOS",
+                "FALLBACK_PROMPT": "No results found",
+            },
+        )
+        resp.raise_for_status()
+        return resp.json().get("SYS_PROMPT", "No results found")
+# ---------------------------------------------------------------------------
+# HF Documentation - Fetching
+# ---------------------------------------------------------------------------
+async def _fetch_endpoint_docs(hf_token: str, endpoint: str) -> list[dict[str, str]]:
+    """Fetch all docs for an endpoint by parsing sidebar and fetching each page."""
+    url = f"https://huggingface.co/docs/{endpoint}"
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        resp = await client.get(url, headers=headers)
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, "html.parser")
+        sidebar = soup.find("nav", class_=lambda x: x and "flex-auto" in x)
+        if not sidebar:
+            raise ValueError(f"Could not find navigation sidebar for '{endpoint}'")
+        nav_items = []
+        for link in sidebar.find_all("a", href=True):
+            href = link["href"]
+            page_url = f"https://huggingface.co{href}" if href.startswith("/") else href
+            nav_items.append({"title": link.get_text(strip=True), "url": page_url})
+        if not nav_items:
+            raise ValueError(f"No navigation links found for '{endpoint}'")
+        async def fetch_page(item: dict[str, str]) -> dict[str, str]:
+            md_url = f"{item['url']}.md"
+            try:
+                r = await client.get(md_url, headers=headers)
+                r.raise_for_status()
+                content = r.text.strip()
+                glimpse = content[:200] + "..." if len(content) > 200 else content
+            except Exception as e:
+                content, glimpse = "", f"[Could not fetch: {str(e)[:50]}]"
+            return {
+                "title": item["title"],
+                "url": item["url"],
+                "md_url": md_url,
+                "glimpse": glimpse,
+                "content": content,
+                "section": endpoint,
+            }
+        return list(await asyncio.gather(*[fetch_page(item) for item in nav_items]))
+async def _get_docs(hf_token: str, endpoint: str) -> list[dict[str, str]]:
+    """Get docs for endpoint with caching. Expands composite endpoints."""
+    async with _cache_lock:
+        if endpoint in _docs_cache:
+            return _docs_cache[endpoint]
+    sub_endpoints = COMPOSITE_ENDPOINTS.get(endpoint, [endpoint])
+    all_docs: list[dict[str, str]] = []
+    for sub in sub_endpoints:
+        async with _cache_lock:
+            if sub in _docs_cache:
+                all_docs.extend(_docs_cache[sub])
+                continue
+        docs = await _fetch_endpoint_docs(hf_token, sub)
+        async with _cache_lock:
+            _docs_cache[sub] = docs
+        all_docs.extend(docs)
+    async with _cache_lock:
+        _docs_cache[endpoint] = all_docs
+    return all_docs
+# ---------------------------------------------------------------------------
+# HF Documentation - Search
+# ---------------------------------------------------------------------------
+async def _build_search_index(
+    endpoint: str, docs: list[dict[str, str]]
+) -> tuple[Any, MultifieldParser]:
+    """Build or retrieve cached Whoosh search index."""
+    async with _cache_lock:
+        if endpoint in _index_cache:
+            return _index_cache[endpoint]
+    analyzer = StemmingAnalyzer()
+    schema = Schema(
+        title=TEXT(stored=True, analyzer=analyzer),
+        url=ID(stored=True, unique=True),
+        md_url=ID(stored=True),
+        section=ID(stored=True),
+        glimpse=TEXT(stored=True, analyzer=analyzer),
+        content=TEXT(stored=False, analyzer=analyzer),
+    )
+    storage = RamStorage()
+    index = storage.create_index(schema)
+    writer = index.writer()
+    for doc in docs:
+        writer.add_document(
+            title=doc.get("title", ""),
+            url=doc.get("url", ""),
+            md_url=doc.get("md_url", ""),
+            section=doc.get("section", endpoint),
+            glimpse=doc.get("glimpse", ""),
+            content=doc.get("content", ""),
+        )
+    writer.commit()
+    parser = MultifieldParser(
+        ["title", "content"],
+        schema=schema,
+        fieldboosts={"title": 2.0, "content": 1.0},
+        group=OrGroup,
+    )
+    async with _cache_lock:
+        _index_cache[endpoint] = (index, parser)
+    return index, parser
+async def _search_docs(
+    endpoint: str, docs: list[dict[str, str]], query: str, limit: int
+) -> tuple[list[dict[str, Any]], str | None]:
+    """Search docs using Whoosh. Returns (results, fallback_message)."""
+    index, parser = await _build_search_index(endpoint, docs)
+    try:
+        query_obj = parser.parse(query)
+    except Exception:
+        return [], "Query contained unsupported syntax; showing default ordering."
+    with index.searcher() as searcher:
+        results = searcher.search(query_obj, limit=limit)
+        matches = [
+            {
+                "title": hit["title"],
+                "url": hit["url"],
+                "md_url": hit.get("md_url", ""),
+                "section": hit.get("section", endpoint),
+                "glimpse": hit["glimpse"],
+                "score": round(hit.score, 2),
+            }
+            for hit in results
+        ]
+    if not matches:
+        return [], "No strong matches found; showing default ordering."
+    return matches, None
+# ---------------------------------------------------------------------------
+# HF Documentation - Formatting
+# ---------------------------------------------------------------------------
+def _format_results(
+    endpoint: str,
+    items: list[dict[str, Any]],
+    total: int,
+    query: str | None = None,
+    note: str | None = None,
+) -> str:
+    """Format search results as readable text."""
+    base_url = f"https://huggingface.co/docs/{endpoint}"
+    out = f"Documentation structure for: {base_url}\n\n"
+    if query:
+        out += f"Query: '{query}' → showing {len(items)} result(s) out of {total} pages"
+        if note:
+            out += f" ({note})"
+        out += "\n\n"
+    else:
+        out += f"Found {len(items)} page(s) (total available: {total}).\n"
+        if note:
+            out += f"({note})\n"
+        out += "\n"
+    for i, item in enumerate(items, 1):
+        out += f"{i}. **{item['title']}**\n"
+        out += f"   URL: {item['url']}\n"
+        out += f"   Section: {item.get('section', endpoint)}\n"
+        if query and "score" in item:
+            out += f"   Relevance score: {item['score']:.2f}\n"
+        out += f"   Glimpse: {item['glimpse']}\n\n"
+    return out
+# ---------------------------------------------------------------------------
+# Handlers
+# ---------------------------------------------------------------------------
+async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
+    """Explore documentation structure with optional search query."""
+    endpoint = arguments.get("endpoint", "").lstrip("/")
+    query = arguments.get("query")
+    max_results = arguments.get("max_results")
+    if not endpoint:
+        return "Error: No endpoint provided", False
+    # Gradio uses its own API
+    if endpoint.lower() == "gradio":
+        try:
+            clean_query = (
+                query.strip() if isinstance(query, str) and query.strip() else None
+            )
+            content = await _fetch_gradio_docs(clean_query)
+            header = "# Gradio Documentation\n\n"
+            if clean_query:
+                header += f"Query: '{clean_query}'\n\n"
+            header += "Source: https://gradio.app/docs\n\n---\n\n"
+            return header + content, True
+        except httpx.HTTPStatusError as e:
+            return f"HTTP error fetching Gradio docs: {e.response.status_code}", False
+        except httpx.RequestError as e:
+            return f"Request error fetching Gradio docs: {str(e)}", False
+        except Exception as e:
+            return f"Error fetching Gradio docs: {str(e)}", False
+    # HF docs
+    hf_token = os.environ.get("HF_TOKEN")
+    if not hf_token:
+        return "Error: HF_TOKEN environment variable not set", False
+    try:
+        max_results_int = int(max_results) if max_results is not None else None
+    except (TypeError, ValueError):
+        return "Error: max_results must be an integer", False
+    if max_results_int is not None and max_results_int <= 0:
+        return "Error: max_results must be greater than zero", False
+    try:
+        docs = await _get_docs(hf_token, endpoint)
+        total = len(docs)
+        # Determine limit
+        if max_results_int is None:
+            limit = DEFAULT_MAX_RESULTS
+            limit_note = f"Showing top {DEFAULT_MAX_RESULTS} results (set max_results to adjust)."
+        elif max_results_int > MAX_RESULTS_CAP:
+            limit = MAX_RESULTS_CAP
+            limit_note = f"Requested {max_results_int} but showing top {MAX_RESULTS_CAP} (maximum)."
+        else:
+            limit = max_results_int
+            limit_note = None
+        # Search or paginate
+        clean_query = (
+            query.strip() if isinstance(query, str) and query.strip() else None
+        )
+        fallback_msg = None
+        if clean_query:
+            results, fallback_msg = await _search_docs(
+                endpoint, docs, clean_query, limit
+            )
+            if not results:
+                results = docs[:limit]
+        else:
+            results = docs[:limit]
+        # Combine notes
+        notes = []
+        if fallback_msg:
+            notes.append(fallback_msg)
+        if limit_note:
+            notes.append(limit_note)
+        note = "; ".join(notes) if notes else None
+        return _format_results(endpoint, results, total, clean_query, note), True
+    except httpx.HTTPStatusError as e:
+        return f"HTTP error: {e.response.status_code} - {e.response.text[:200]}", False
+    except httpx.RequestError as e:
+        return f"Request error: {str(e)}", False
+    except ValueError as e:
+        return f"Error: {str(e)}", False
+    except Exception as e:
+        return f"Unexpected error: {str(e)}", False
+async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
+    """Fetch full markdown content of a documentation page."""
+    url = arguments.get("url", "")
+    if not url:
+        return "Error: No URL provided", False
+    hf_token = os.environ.get("HF_TOKEN")
+    if not hf_token:
+        return "Error: HF_TOKEN environment variable not set", False
+    if not url.endswith(".md"):
+        url = f"{url}.md"
+    try:
+        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+            resp = await client.get(
+                url, headers={"Authorization": f"Bearer {hf_token}"}
+            )
+            resp.raise_for_status()
+        return f"Documentation from: {url}\n\n{resp.text}", True
+    except httpx.HTTPStatusError as e:
+        return (
+            f"HTTP error fetching {url}: {e.response.status_code} - {e.response.text[:200]}",
+            False,
+        )
+    except httpx.RequestError as e:
+        return f"Request error fetching {url}: {str(e)}", False
+    except Exception as e:
+        return f"Error fetching documentation: {str(e)}", False
+# ---------------------------------------------------------------------------
+# OpenAPI Search
+# ---------------------------------------------------------------------------
+async def _fetch_openapi_spec() -> dict[str, Any]:
+    """Fetch and cache HuggingFace OpenAPI specification."""
+    global _openapi_cache
+    if _openapi_cache is not None:
+        return _openapi_cache
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        resp = await client.get("https://huggingface.co/.well-known/openapi.json")
+        resp.raise_for_status()
+    _openapi_cache = resp.json()
+    return _openapi_cache
+def _extract_all_tags(spec: dict[str, Any]) -> list[str]:
+    """Extract all unique tags from OpenAPI spec."""
+    tags = set()
+    for tag_obj in spec.get("tags", []):
+        if "name" in tag_obj:
+            tags.add(tag_obj["name"])
+    for path_item in spec.get("paths", {}).values():
+        for method, op in path_item.items():
+            if method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+                for tag in op.get("tags", []):
+                    tags.add(tag)
+    return sorted(tags)
+def _extract_all_endpoints(spec: dict[str, Any]) -> list[dict[str, Any]]:
+    """Extract all endpoints from OpenAPI spec."""
+    servers = spec.get("servers", [])
+    base_url = (
+        servers[0].get("url", "https://huggingface.co")
+        if servers
+        else "https://huggingface.co"
+    )
+    endpoints = []
+    for path, path_item in spec.get("paths", {}).items():
+        for method, op in path_item.items():
+            if method not in ["get", "post", "put", "delete", "patch", "head", "options"]:
+                continue
+            endpoints.append({
+                "path": path,
+                "method": method.upper(),
+                "operationId": op.get("operationId", ""),
+                "summary": op.get("summary", ""),
+                "description": op.get("description", ""),
+                "tags": " ".join(op.get("tags", [])),
+                "parameters": op.get("parameters", []),
+                "request_body": op.get("requestBody", {}),
+                "responses": op.get("responses", {}),
+                "base_url": base_url,
+            })
+    return endpoints
+async def _build_openapi_index() -> tuple[Any, MultifieldParser, list[dict[str, Any]]]:
+    """Build or retrieve cached Whoosh index for OpenAPI endpoints."""
+    global _openapi_index_cache
+    async with _cache_lock:
+        if _openapi_index_cache is not None:
+            return _openapi_index_cache
+    spec = await _fetch_openapi_spec()
+    endpoints = _extract_all_endpoints(spec)
+    analyzer = StemmingAnalyzer()
+    schema = Schema(
+        path=ID(stored=True, unique=True),
+        method=ID(stored=True),
+        operationId=TEXT(stored=True, analyzer=analyzer),
+        summary=TEXT(stored=True, analyzer=analyzer),
+        description=TEXT(stored=True, analyzer=analyzer),
+        tags=TEXT(stored=True, analyzer=analyzer),
+        param_names=TEXT(stored=False, analyzer=analyzer),
+    )
+    storage = RamStorage()
+    index = storage.create_index(schema)
+    writer = index.writer()
+    for ep in endpoints:
+        param_names = " ".join(p.get("name", "") for p in ep.get("parameters", []))
+        writer.add_document(
+            path=ep["path"],
+            method=ep["method"],
+            operationId=ep.get("operationId", ""),
+            summary=ep.get("summary", ""),
+            description=ep.get("description", ""),
+            tags=ep.get("tags", ""),
+            param_names=param_names,
+        )
+    writer.commit()
+    parser = MultifieldParser(
+        ["summary", "description", "operationId", "tags", "param_names"],
+        schema=schema,
+        fieldboosts={"summary": 3.0, "operationId": 2.0, "description": 1.0, "tags": 1.5},
+        group=OrGroup,
+    )
+    async with _cache_lock:
+        _openapi_index_cache = (index, parser, endpoints)
+    return index, parser, endpoints
+async def _search_openapi(
+    query: str, tag: str | None, limit: int = 20
+) -> tuple[list[dict[str, Any]], str | None]:
+    """Search OpenAPI endpoints using Whoosh. Returns (results, fallback_message)."""
+    index, parser, endpoints = await _build_openapi_index()
+    try:
+        query_obj = parser.parse(query)
+    except Exception:
+        return [], "Query contained unsupported syntax."
+    with index.searcher() as searcher:
+        results = searcher.search(query_obj, limit=limit * 2)  # Get extra for tag filtering
+        matches = []
+        for hit in results:
+            # Find full endpoint data
+            ep = next((e for e in endpoints if e["path"] == hit["path"] and e["method"] == hit["method"]), None)
+            if ep is None:
+                continue
+            # Filter by tag if provided
+            if tag and tag not in ep.get("tags", ""):
+                continue
+            matches.append({**ep, "score": round(hit.score, 2)})
+            if len(matches) >= limit:
+                break
+    return matches, None if matches else "No matches found for query."
+def _generate_curl_example(endpoint: dict[str, Any]) -> str:
+    """Generate curl command example for an endpoint."""
+    method = endpoint["method"]
+    path = endpoint["path"]
+    base_url = endpoint["base_url"]
+    # Build URL with path parameters
+    full_path = path
+    for param in endpoint.get("parameters", []):
+        if param.get("in") == "path" and param.get("required"):
+            name = param["name"]
+            example = param.get(
+                "example", param.get("schema", {}).get("example", f"<{name}>")
+            )
+            full_path = full_path.replace(f"{{{name}}}", str(example))
+    curl = f"curl -X {method} \\\n  '{base_url}{full_path}'"
+    # Add query parameters
+    query_params = [p for p in endpoint.get("parameters", []) if p.get("in") == "query"]
+    if query_params and query_params[0].get("required"):
+        param = query_params[0]
+        example = param.get("example", param.get("schema", {}).get("example", "value"))
+        curl += f"?{param['name']}={example}"
+    curl += " \\\n  -H 'Authorization: Bearer $HF_TOKEN'"
+    # Add request body
+    if method in ["POST", "PUT", "PATCH"] and endpoint.get("request_body"):
+        content = endpoint["request_body"].get("content", {})
+        if "application/json" in content:
+            curl += " \\\n  -H 'Content-Type: application/json'"
+            schema = content["application/json"].get("schema", {})
+            example = schema.get("example", "{}")
+            if isinstance(example, dict):
+                example = json.dumps(example, indent=2)
+            curl += f" \\\n  -d '{example}'"
+    return curl
+def _format_parameters(parameters: list[dict[str, Any]]) -> str:
+    """Format parameter information from OpenAPI spec."""
+    if not parameters:
+        return ""
+    path_params = [p for p in parameters if p.get("in") == "path"]
+    query_params = [p for p in parameters if p.get("in") == "query"]
+    header_params = [p for p in parameters if p.get("in") == "header"]
+    output = []
+    for label, params in [
+        ("Path Parameters", path_params),
+        ("Query Parameters", query_params),
+        ("Header Parameters", header_params),
+    ]:
+        if not params:
+            continue
+        if output:
+            output.append("")
+        output.append(f"**{label}:**")
+        for p in params:
+            name = p.get("name", "")
+            required = " (required)" if p.get("required") else " (optional)"
+            desc = p.get("description", "")
+            ptype = p.get("schema", {}).get("type", "string")
+            example = p.get("example") or p.get("schema", {}).get("example", "")
+            output.append(f"- `{name}` ({ptype}){required}: {desc}")
+            if example:
+                output.append(f"  Example: `{example}`")
+    return "\n".join(output)
+def _format_response_info(responses: dict[str, Any]) -> str:
+    """Format response information from OpenAPI spec."""
+    if not responses:
+        return "No response information available"
+    output = []
+    for status, resp_obj in list(responses.items())[:3]:
+        desc = resp_obj.get("description", "")
+        output.append(f"- **{status}**: {desc}")
+        content = resp_obj.get("content", {})
+        if "application/json" in content:
+            schema = content["application/json"].get("schema", {})
+            if "type" in schema:
+                output.append(f"  Returns: {schema.get('type', 'object')}")
+    return "\n".join(output)
+def _format_openapi_results(
+    results: list[dict[str, Any]],
+    tag: str | None = None,
+    query: str | None = None,
+    note: str | None = None,
+) -> str:
+    """Format OpenAPI search results with curl examples."""
+    if not results:
+        if query and tag:
+            return f"No API endpoints found matching '{query}' in tag '{tag}'"
+        elif query:
+            return f"No API endpoints found matching '{query}'"
+        elif tag:
+            return f"No API endpoints found with tag '{tag}'"
+        return "No API endpoints found"
+    # Build header
+    if query and tag:
+        out = f"# API Endpoints matching '{query}' (tag: `{tag}`)\n\n"
+    elif query:
+        out = f"# API Endpoints matching '{query}'\n\n"
+    elif tag:
+        out = f"# API Endpoints for tag: `{tag}`\n\n"
+    else:
+        out = "# API Endpoints\n\n"
+    out += f"Found {len(results)} endpoint(s)"
+    if note:
+        out += f" ({note})"
+    out += "\n\n---\n\n"
+    for i, ep in enumerate(results, 1):
+        out += f"## {i}. {ep['method']} {ep['path']}\n\n"
+        if query and "score" in ep:
+            out += f"**Relevance:** {ep['score']:.2f}\n\n"
+        if ep.get("summary"):
+            out += f"**Summary:** {ep['summary']}\n\n"
+        if ep.get("description"):
+            desc = ep["description"][:300]
+            if len(ep["description"]) > 300:
+                desc += "..."
+            out += f"**Description:** {desc}\n\n"
+        if ep.get("tags"):
+            out += f"**Tags:** {ep['tags']}\n\n"
+        params_info = _format_parameters(ep.get("parameters", []))
+        if params_info:
+            out += params_info + "\n\n"
+        out += "**Usage:**\n```bash\n"
+        out += _generate_curl_example(ep)
+        out += "\n```\n\n"
+        out += "**Returns:**\n"
+        out += _format_response_info(ep["responses"])
+        out += "\n\n---\n\n"
+    return out
+async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
+    """Search HuggingFace OpenAPI specification by query and/or tag."""
+    tag = arguments.get("tag", "").strip() or None
+    query = arguments.get("query", "").strip() or None
+    if not tag and not query:
+        return "Error: Provide either 'query' (keyword search) or 'tag' (category filter), or both.", False
+    try:
+        note = None
+        # If query provided, try Whoosh search first
+        if query:
+            results, search_note = await _search_openapi(query, tag, limit=20)
+            # If Whoosh found results, return them
+            if results:
+                return _format_openapi_results(results, tag=tag, query=query, note=search_note), True
+            # Whoosh found nothing - fall back to tag-based if tag provided
+            if tag:
+                note = f"No matches for '{query}'; showing all endpoints in tag '{tag}'"
+            else:
+                # No tag to fall back to
+                return _format_openapi_results([], query=query), True
+        # Tag-based search (either as fallback or primary)
+        if tag:
+            _, _, endpoints = await _build_openapi_index()
+            results = [ep for ep in endpoints if tag in ep.get("tags", "")]
+            return _format_openapi_results(results, tag=tag, query=None, note=note), True
+        return "Error: No results found", False
+    except httpx.HTTPStatusError as e:
+        return f"HTTP error fetching OpenAPI spec: {e.response.status_code}", False
+    except httpx.RequestError as e:
+        return f"Request error: {str(e)}", False
+    except Exception as e:
+        return f"Error searching OpenAPI spec: {str(e)}", False
+async def _get_api_search_tool_spec() -> dict[str, Any]:
+    """Generate OpenAPI tool spec with tags populated at runtime."""
+    spec = await _fetch_openapi_spec()
+    tags = _extract_all_tags(spec)
+    return {
+        "name": "find_hf_api",
+        "description": (
+            "Find HuggingFace Hub REST API endpoints to make HTTP requests. Returns curl examples with authentication. "
+            "⚠️ USE THIS TOOL when you need to call the HF Hub API directly - for operations like: "
+            "uploading/downloading files, managing repos, listing models/datasets, getting user info, "
+            "managing webhooks, collections, discussions, or any Hub interaction not covered by other tools. "
+            "**Use cases:** (1) 'Stream Space logs' → query='space logs', "
+            "(2) 'Get Space metrics/Zero-GPU usage' → query='space metrics', "
+            "(3) 'List organization members' → query='organization members', "
+            "(4) 'Generate repo access token' → query='jwt token', "
+            "(5) 'Check repo security scan' → query='security scan'. "
+            "**Search modes:** Use 'query' for keyword search, 'tag' to browse a category, or both. "
+            "If query finds no results, falls back to showing all endpoints in the tag. "
+            "**Output:** Full endpoint details with method, path, parameters, curl command, and response schema."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": (
+                        "Keyword search across endpoint summaries, descriptions, and operation IDs. "
+                        "Examples: 'upload file', 'create repository', 'list user models', 'delete branch', "
+                        "'webhook', 'collection', 'discussion comments'. Supports stemming (upload/uploading both work)."
+                    ),
+                },
+                "tag": {
+                    "type": "string",
+                    "enum": tags,
+                    "description": (
+                        "Filter by API category. Use alone to browse all endpoints in a category, "
+                        "or combine with 'query' to search within a category."
+                    ),
+                },
+            },
+            "required": [],
+        },
+    }
+# ---------------------------------------------------------------------------
+# Tool Specifications
+# ---------------------------------------------------------------------------
+DOC_ENDPOINTS = [
+    "hub",
+    "transformers",
+    "diffusers",
+    "datasets",
+    "gradio",
+    "trackio",
+    "smolagents",
+    "huggingface_hub",
+    "huggingface.js",
+    "transformers.js",
+    "inference-providers",
+    "inference-endpoints",
+    "peft",
+    "accelerate",
+    "optimum",
+    "tokenizers",
+    "courses",
+    "evaluate",
+    "tasks",
+    "dataset-viewer",
+    "trl",
+    "simulate",
+    "sagemaker",
+    "timm",
+    "safetensors",
+    "tgi",
+    "setfit",
+    "lerobot",
+    "autotrain",
+    "tei",
+    "bitsandbytes",
+    "sentence_transformers",
+    "chat-ui",
+    "leaderboards",
+    "lighteval",
+    "argilla",
+    "distilabel",
+    "microsoft-azure",
+    "kernels",
+    "google-cloud",
+]
+EXPLORE_HF_DOCS_TOOL_SPEC = {
+    "name": "explore_hf_docs",
+    "description": (
+        "Explore Hugging Face documentation structure and discover available pages with 200-character previews. "
+        "⚠️ MANDATORY: ALWAYS use this BEFORE implementing any ML task (training, fine-tuning, data processing, inference). "
+        "Your training data may be outdated - current documentation is the source of truth. "
+        "**Use when:** (1) Starting any implementation task, (2) User asks 'how to' questions, "
+        "(3) Before writing training/processing code, (4) Researching library capabilities, "
+        "(5) Verifying API syntax and parameters. "
+        "**Pattern:** explore (discover structure) → fetch_hf_docs (get details) → implement with researched approach. "
+        "Returns: Sidebar navigation with titles, URLs, and glimpses of all pages in the selected documentation. "
+        "**Then:** Use fetch_hf_docs with specific URLs from results to get full content. "
+        "**Critical for reliability:** Never implement based on internal knowledge without checking current docs first - APIs change frequently."
+        " By default returns the top 20 results; set max_results (max 50) to adjust."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "endpoint": {
+                "type": "string",
+                "enum": DOC_ENDPOINTS,
+                "description": (
+                    "The documentation endpoint to explore. Each endpoint corresponds to a major section of the Hugging Face documentation:\n\n"
+                    "• courses — All Hugging Face courses (LLM, robotics, MCP, smol (llm training), agents, deep RL, computer vision, games, diffusion, 3D, audio) and the cookbook recipes. Probably the best place for examples.\n"
+                    "• hub — Find answers to questions about models/datasets/spaces, auth, versioning, metadata.\n"
+                    "• transformers — Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
+                    "• diffusers — Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
+                    "• datasets — Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
+                    "• gradio — UI components and demos for ML models. Uses Gradio's native API: without query returns full docs (llms.txt), with query uses embedding search for precise results.\n"
+                    "• trackio — Experiment tracking, metrics logging, and run comparison.\n"
+                    "• smolagents — Lightweight agent abstractions and tool-using patterns.\n"
+                    "• huggingface_hub — Python client for Hub operations (auth, upload/download, repo management).\n"
+                    "• huggingface.js — JS/TS client for Hub APIs in browser and Node.\n"
+                    "• transformers.js — Run Transformer models in browser/Node via WebGPU/WASM.\n"
+                    "• inference-providers — Unified interface for third-party inference backends.\n"
+                    "• inference-endpoints — Managed, scalable model deployments on HF infrastructure.\n"
+                    "• peft — Parameter-efficient fine-tuning methods (LoRA, adapters, etc.).\n"
+                    "• accelerate — Hardware-agnostic, distributed and mixed-precision training orchestration.\n"
+                    "• optimum — Hardware-aware optimization and model export tooling, including Habana, Neuron, Intel, ExecuTorch, and TPU variants.\n"
+                    "• tokenizers — Fast tokenizer internals, training, and low-level APIs.\n"
+                    "• evaluate — Metrics, evaluation workflows, and training-loop integration.\n"
+                    "• tasks — Canonical task definitions and model categorization.\n"
+                    "• dataset-viewer — Dataset preview, streaming views, and viewer internals.\n"
+                    "• trl — RLHF, DPO, PPO, and SFT utilities for LLMs.\n"
+                    "• simulate — Experimental simulation tools and workflows.\n"
+                    "• sagemaker — Deploying Hugging Face models on AWS SageMaker.\n"
+                    "• timm — Image model zoo and utilities via HF integrations.\n"
+                    "• safetensors — Safe, fast tensor serialization format.\n"
+                    "• tgi — High-throughput text generation server for LLMs.\n"
+                    "• setfit — Few-shot text classification via sentence embeddings.\n"
+                    "• lerobot — Robotics datasets, policies, and learning workflows.\n"
+                    "• autotrain — No/low-code model training on Hugging Face.\n"
+                    "• tei — Optimized inference server for embedding workloads.\n"
+                    "• bitsandbytes — Quantization and memory-efficient optimizers.\n"
+                    "• sentence_transformers — Embedding models, training recipes, similarity/search workflows.\n"
+                    "• chat-ui — Reference chat interfaces for LLM deployment.\n"
+                    "• leaderboards — Evaluation leaderboards and submission mechanics.\n"
+                    "• lighteval — Lightweight, reproducible LLM evaluation framework.\n"
+                    "• argilla — Data annotation, feedback, and human-in-the-loop workflows.\n"
+                    "• distilabel — Synthetic data generation and distillation pipelines.\n"
+                    "• microsoft-azure — Azure deployment and integration guides.\n"
+                    "• kernels — Lightweight execution environments and notebook-style workflows.\n"
+                    "• google-cloud — GCP deployment and serving workflows.\n"
+                ),
+            },
+            "query": {
+                "type": "string",
+                "description": (
+                    "Optional keyword query to rank and filter documentation pages. "
+                    "For Gradio, use concise queries like 'how to use the image component' or 'audio component demo'."
+                ),
+            },
+            "max_results": {
+                "type": "integer",
+                "description": "Max results (default 20, max 50). Ignored for Gradio.",
+                "minimum": 1,
+                "maximum": 50,
+            },
+        },
+        "required": ["endpoint"],
+    },
+}
+HF_DOCS_FETCH_TOOL_SPEC = {
+    "name": "fetch_hf_docs",
+    "description": (
+        "Fetch full markdown content of a specific HF documentation page. "
+        "⚠️ CRITICAL: Use this after explore_hf_docs to get detailed implementation guidance. "
+        "**Use when:** (1) Found relevant page in explore_hf_docs results, (2) Need complete API documentation, "
+        "(3) Need training method details (SFT/DPO/GRPO), (4) Need configuration examples, "
+        "(5) Need parameter descriptions and usage patterns. "
+        "**Pattern:** explore_hf_docs (find relevant page) → fetch_hf_docs (get full content) → implement using documented approach. "
+        "Provide full URL from explore_hf_docs results (e.g., 'https://huggingface.co/docs/trl/sft_trainer'). "
+        "Returns: Complete markdown documentation with examples, parameters, and usage patterns. "
+        "**For training tasks:** ALWAYS fetch trainer docs (SFTConfig, DPOConfig, etc.) before creating training scripts. "
+        "**Critical for reliability:** This ensures you use current APIs and best practices."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "url": {
+                "type": "string",
+                "description": (
+                    "The full URL to the documentation page. "
+                    "Example: 'https://huggingface.co/docs/trl/dpo_trainer' "
+                    "The .md extension will be added automatically if not present."
+                ),
+            },
+        },
+        "required": ["url"],
+    },
+}

agent/tools/github_find_examples.py ADDED Viewed

	@@ -0,0 +1,499 @@

+"""
+GitHub Find Examples Tool - Discover examples, tutorials, and guides for any library
+Lists all files in a repository and performs deterministic keyword search.
+"""
+import os
+from typing import Any, Dict, List
+import requests
+from thefuzz import fuzz
+from agent.tools.types import ToolResult
+# In order of priority (lower index = higher priority for sorting)
+EXAMPLE_PATTERNS = [
+    "scripts",
+    # General example patterns (catch-all, lower priority)
+    "examples",
+    "example",
+    # Notebook patterns
+    "notebooks",
+    "notebook",
+    # Tutorial/learning patterns
+    "tutorials",
+    "tutorial",
+    "quickstart",
+    "walkthroughs",
+    "walkthrough",
+    # Cookbook/recipe patterns
+    "cookbook",
+    "cookbooks",
+    "recipes",
+    "recipe",
+    # Demo/sample patterns
+    "demos",
+    "demo",
+    "samples",
+    "sample",
+    # Other patterns
+    "guides",
+    "guide",
+    "getting-started",
+    "getting_started",
+    "playground",
+    "howto",
+    "how-to",
+    "use-cases",
+    "usecases",
+    "use_cases",
+    "sandbox",
+    "showcase",
+]
+def _get_repo_tree(org: str, repo: str, token: str) -> tuple[List[Dict[str, Any]], str]:
+    """Get all files in a repository recursively. Returns (files, error_message)"""
+    headers = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "Authorization": f"Bearer {token}",
+    }
+    full_repo = f"{org}/{repo}"
+    # Get default branch
+    try:
+        response = requests.get(
+            f"https://api.github.com/repos/{full_repo}", headers=headers, timeout=10
+        )
+        if response.status_code == 404:
+            return [], "not_found"
+        if response.status_code != 200:
+            return [], f"API error: {response.status_code}"
+        repo_data = response.json()
+        default_branch = repo_data.get("default_branch", "main")
+    except Exception as e:
+        return [], f"Error fetching repo: {str(e)}"
+    # Get repository tree recursively
+    try:
+        response = requests.get(
+            f"https://api.github.com/repos/{full_repo}/git/trees/{default_branch}",
+            headers=headers,
+            params={"recursive": "1"},
+            timeout=30,
+        )
+        if response.status_code != 200:
+            return [], f"Error fetching tree: {response.status_code}"
+        data = response.json()
+        tree = data.get("tree", [])
+        # Filter to only include files (not directories)
+        files = [
+            {
+                "path": item["path"],
+                "ref": item["sha"],
+                "size": item.get("size", 0),
+                "url": f"https://github.com/{full_repo}/blob/{default_branch}/{item['path']}",
+            }
+            for item in tree
+            if item["type"] == "blob"
+        ]
+        return files, ""
+    except Exception as e:
+        return [], f"Error processing tree: {str(e)}"
+def _search_similar_repos(org: str, repo: str, token: str) -> List[Dict[str, Any]]:
+    """Search for similar repository names in the organization"""
+    headers = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "Authorization": f"Bearer {token}",
+    }
+    # Search for repos in the org with similar name
+    query = f"org:{org} {repo}"
+    try:
+        response = requests.get(
+            "https://api.github.com/search/repositories",
+            headers=headers,
+            params={"q": query, "sort": "stars", "order": "desc", "per_page": 10},
+            timeout=30,
+        )
+        if response.status_code != 200:
+            return []
+        data = response.json()
+        items = data.get("items", [])
+        return [
+            {
+                "name": item.get("name"),
+                "full_name": item.get("full_name"),
+                "description": item.get("description"),
+                "stars": item.get("stargazers_count", 0),
+                "url": item.get("html_url"),
+            }
+            for item in items
+        ]
+    except Exception:
+        return []
+def _score_against_example_patterns(file_path: str) -> int:
+    """Score file against example patterns using token_set_ratio"""
+    scores = []
+    for pattern in EXAMPLE_PATTERNS:
+        score = fuzz.token_set_ratio(pattern.lower(), file_path.lower())
+        scores.append(score)
+    return max(scores) if scores else 0
+def _score_against_keyword(file_path: str, keyword: str) -> int:
+    """Calculate fuzzy match score for a file path against a keyword"""
+    # Use partial_ratio for substring matching (good for paths)
+    # Also check token_set_ratio for word-level matching
+    partial_score = fuzz.partial_ratio(keyword.lower(), file_path.lower())
+    token_score = fuzz.token_set_ratio(keyword.lower(), file_path.lower())
+    # Return the higher of the two
+    return max(partial_score, token_score)
+def _get_pattern_priority(file_path: str) -> tuple[int, int, int]:
+    """
+    Get priority of a file path based on which example pattern directory it's in.
+    Returns: (in_examples_dir, pattern_priority, path_depth)
+    - in_examples_dir: 0 if in examples/ directory, 1 otherwise (lower is better)
+    - pattern_priority: Index in EXAMPLE_PATTERNS (lower is better), or 999 if no match
+    - path_depth: Number of path segments (lower is better)
+    Note: Prioritizes files in "examples/" directory first, then by most specific pattern match.
+    E.g., "examples/scripts/train.py" is better than "scripts/util.py"
+    """
+    path_lower = file_path.lower()
+    path_parts = path_lower.split("/")
+    # Check if file is in examples/ directory (highest priority)
+    in_examples_dir = 0 if (path_parts[0] in ["examples", "example"]) else 1
+    # Find ALL matching patterns and use the best (lowest index) one
+    # But prefer deeper matches (more specific) over shallow ones
+    best_priority = 999
+    best_depth_at_match = -1
+    for i, pattern in enumerate(EXAMPLE_PATTERNS):
+        # Check if pattern appears as a directory component in the path
+        if pattern in path_parts:
+            # Find the depth where this pattern appears (rightmost occurrence)
+            depth = len(path_parts) - 1 - path_parts[::-1].index(pattern)
+            # Prefer deeper matches, or better priority if at same depth
+            if depth > best_depth_at_match or (
+                depth == best_depth_at_match and i < best_priority
+            ):
+                best_priority = i
+                best_depth_at_match = depth
+    return (in_examples_dir, best_priority, len(path_parts))
+def _handle_repo_tree_errors(
+    all_files: List[Dict[str, Any]],
+    error: str,
+    org: str,
+    repo: str,
+    token: str,
+) -> ToolResult | None:
+    """Handle errors from repo tree fetch. Returns ToolResult if error, None if OK."""
+    if error == "not_found":
+        similar_repos = _search_similar_repos(org, repo, token)
+        if not similar_repos:
+            return {
+                "formatted": f"Repository '{org}/{repo}' not found and no similar repositories found.",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        # Format similar repos
+        lines = [f"**Repository '{org}/{repo}' not found. Similar repositories:**\n"]
+        for i, r in enumerate(similar_repos, 1):
+            lines.append(f"{i}. **{r['full_name']}** (⭐ {r['stars']:,} stars)")
+            if r["description"]:
+                desc = (
+                    r["description"][:100] + "..."
+                    if len(r["description"]) > 100
+                    else r["description"]
+                )
+                lines.append(f"   {desc}")
+            lines.append(f"   {r['url']}\n")
+        return {
+            "formatted": "\n".join(lines),
+            "totalResults": len(similar_repos),
+            "resultsShared": len(similar_repos),
+            "isError": True,
+        }
+    if error:
+        return {
+            "formatted": f"Error accessing repository '{org}/{repo}': {error}",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    if not all_files:
+        return {
+            "formatted": f"No files found in repository '{org}/{repo}'",
+            "totalResults": 0,
+            "resultsShared": 0,
+        }
+    return None
+def find_examples(
+    keyword: str = "",
+    repo: str = "",
+    org: str = "huggingface",
+    max_results: int = 10,
+    min_score: int = 80,
+) -> ToolResult:
+    """
+    Find example files in a repository using fuzzy matching.
+    Args:
+        keyword: Keyword to fuzzy match against file paths (e.g., "grpo")
+        repo: Repository name (e.g., "trl")
+        org: GitHub organization (default: "huggingface")
+        max_results: Maximum number of results (default 50)
+        min_score: Minimum fuzzy match score (0-100, default 60)
+    Returns:
+        ToolResult with matching files, or similar repos if repo not found
+    """
+    token = os.environ.get("GITHUB_TOKEN")
+    if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    if not repo:
+        return {
+            "formatted": "Error: repo parameter is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Get all files in the repository
+    all_files, error = _get_repo_tree(org, repo, token)
+    # Handle errors (not found, API errors, empty repo)
+    if error_result := _handle_repo_tree_errors(all_files, error, org, repo, token):
+        return error_result
+    # Step 1: Filter files by example patterns (score >= 60)
+    example_threshold = 60
+    example_files = []
+    for file in all_files:
+        example_score = _score_against_example_patterns(file["path"])
+        if example_score >= example_threshold:
+            example_files.append({**file, "example_score": example_score})
+    if not example_files:
+        return {
+            "formatted": f"No example files found in {org}/{repo} (no files match example patterns with score >= {example_threshold}).",
+            "totalResults": 0,
+            "resultsShared": 0,
+        }
+    # Step 2: If keyword provided, score and filter by keyword
+    if keyword:
+        scored_files = []
+        for file in example_files:
+            keyword_score = _score_against_keyword(file["path"], keyword)
+            if keyword_score >= min_score:
+                scored_files.append({**file, "score": keyword_score})
+        if not scored_files:
+            return {
+                "formatted": f"No files found in {org}/{repo} matching keyword '{keyword}' (min score: {min_score}) among {len(example_files)} example files.",
+                "totalResults": 0,
+                "resultsShared": 0,
+            }
+        # Sort by keyword score (descending) for best matches first
+        scored_files.sort(key=lambda x: x["score"], reverse=True)
+    else:
+        # No keyword: prioritize by pattern directory, then path depth
+        scored_files = []
+        for file in example_files:
+            in_examples_dir, pattern_priority, path_depth = _get_pattern_priority(
+                file["path"]
+            )
+            scored_files.append(
+                {
+                    **file,
+                    "score": file["example_score"],
+                    "in_examples_dir": in_examples_dir,
+                    "pattern_priority": pattern_priority,
+                    "path_depth": path_depth,
+                }
+            )
+        if not scored_files:
+            return {
+                "formatted": f"No example files found in {org}/{repo}.",
+                "totalResults": 0,
+                "resultsShared": 0,
+            }
+        # Sort by: 1) files in examples/ dir first, 2) pattern priority (scripts > datasets > etc), 3) path depth, 4) path name
+        scored_files.sort(
+            key=lambda x: (
+                x["in_examples_dir"],
+                x["pattern_priority"],
+                x["path_depth"],
+                x["path"],
+            )
+        )
+    # Limit results
+    results = scored_files[:max_results]
+    # Format output
+    keyword_desc = f" matching '{keyword}'" if keyword else ""
+    lines = [f"**Found {len(results)} example files in {org}/{repo}{keyword_desc}:**"]
+    if len(scored_files) > max_results:
+        lines[0] += f" (showing {max_results} of {len(scored_files)})"
+    lines.append("")
+    for i, file in enumerate(results, 1):
+        lines.append(f"{i}. **{file['path']}**")
+        lines.append(f"   Size: {file['size']:,} bytes | Ref: {file['ref'][:7]}")
+        lines.append(f"   URL: {file['url']}")
+        # Copyable parameters for read_file tool
+        read_params = f"{{'repo': '{org}/{repo}', 'path': '{file['path']}'}}"
+        lines.append(f"   To read, use: {read_params}")
+        lines.append("")
+    return {
+        "formatted": "\n".join(lines),
+        "totalResults": len(results),
+        "resultsShared": len(results),
+    }
+# Tool specification
+GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
+    "name": "github_find_examples",
+    "description": (
+        "Discover working code examples, tutorials, scripts, and demos in GitHub repositories. "
+        "⚠️ CRITICAL: ALWAYS use this BEFORE implementing ML tasks - find working reference code first. "
+        "Your training data may be outdated; real repository examples show current best practices. "
+        "**Use when:** (1) Starting any ML implementation (training, inference, evaluation), "
+        "(2) User asks 'how to' questions about libraries, (3) Need reference implementations, "
+        "(4) Exploring library capabilities, (5) Before writing training/processing scripts. "
+        "**Pattern:** github_find_examples (discover) → github_read_file (study code) → implement with researched approach. "
+        "Returns: List of example files (scripts/notebooks/tutorials) with paths and URLs, sorted by relevance. "
+        "**Then:** Use github_read_file to read the actual implementation code. "
+        "**Critical for reliability:** Real examples prevent outdated API usage and show proven patterns. "
+        "## How it works\n\n"
+        "1. Fetches all example files (examples/, scripts/, tutorials/, demos/, notebooks/, etc.) from repository\n"
+        "2. If keyword provided, scores files against keyword using fuzzy matching\n"
+        "3. Returns best matches sorted by relevance and pattern priority\n"
+        "4. Provides copyable parameters for github_read_file tool\n\n"
+        "## Examples\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Find GRPO training examples before implementation\n"
+        "// Task: Starting GRPO fine-tuning project, need reference implementation\n"
+        "{\n"
+        "  keyword: 'grpo',\n"
+        "  repo: 'trl',\n"
+        "  org: 'huggingface'\n"
+        "}\n"
+        "// Returns: examples/scripts/grpo_agent.py, examples/scripts/grpo_vlm.py\n"
+        "// Next step: github_read_file to study working implementation\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Discover all available training methods\n"
+        "// Task: Exploring TRL training options before choosing approach\n"
+        "{\n"
+        "  repo: 'trl',\n"
+        "  org: 'huggingface',\n"
+        "  max_results: 20\n"
+        "}\n"
+        "// Lists: SFT, DPO, GRPO, PPO, reward modeling examples\n"
+        "// Helps user choose appropriate method\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Find LoRA fine-tuning examples\n"
+        "// Task: Learning parameter-efficient fine-tuning patterns\n"
+        "{\n"
+        "  keyword: 'lora',\n"
+        "  repo: 'peft',\n"
+        "  org: 'huggingface'\n"
+        "}\n"
+        "// Discovers LoRA configuration and training examples\n"
+        "// Shows current PEFT API usage patterns\n"
+        "</example>"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "keyword": {
+                "type": "string",
+                "description": "Keyword to fuzzy match against file paths (e.g., 'grpo', 'sft').",
+            },
+            "repo": {
+                "type": "string",
+                "description": "Repository name (e.g., 'trl', 'transformers'). Required.",
+            },
+            "org": {
+                "type": "string",
+                "description": "GitHub organization or username. Default: 'huggingface'.",
+            },
+            "max_results": {
+                "type": "integer",
+                "description": "Maximum number of results to return. Default: 50.",
+            },
+            "min_score": {
+                "type": "integer",
+                "description": "Minimum fuzzy match score (0-100). Default: 60.",
+            },
+        },
+        "required": ["repo"],
+    },
+}
+async def github_find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
+    try:
+        result = find_examples(
+            keyword=arguments.get("keyword", ""),
+            repo=arguments["repo"],
+            org=arguments.get("org", "huggingface"),
+            max_results=arguments.get("max_results", 50),
+            min_score=arguments.get("min_score", 60),
+        )
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error finding examples: {str(e)}", False

agent/tools/github_list_repos.py ADDED Viewed

	@@ -0,0 +1,287 @@

+"""
+GitHub List Repositories Tool - List and sort repositories for any user or organization
+Efficiently discover repositories with flexible sorting options.
+"""
+import os
+from typing import Any, Dict, Literal, Optional
+import requests
+from agent.tools.types import ToolResult
+def list_repos(
+    owner: str,
+    owner_type: Literal["user", "org"] = "org",
+    sort: Literal["stars", "forks", "updated", "created"] = "stars",
+    order: Literal["asc", "desc"] = "desc",
+    limit: Optional[int] = 30,
+) -> ToolResult:
+    """
+    List repositories for a user or organization using GitHub REST API.
+    Args:
+        owner: GitHub username or organization name
+        owner_type: Whether the owner is a "user" or "org" (default: "org")
+        sort: Sort field - "stars", "forks", "updated", or "created"
+        order: Sort order - "asc" or "desc" (default: "desc")
+        limit: Maximum number of repositories to return
+    Returns:
+        ToolResult with repository information
+    """
+    token = os.environ.get("GITHUB_TOKEN")
+    if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    if owner_type == "org":
+        url = f"https://api.github.com/orgs/{owner}/repos"
+    else:
+        url = f"https://api.github.com/users/{owner}/repos"
+    headers = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "Authorization": f"Bearer {token}",
+    }
+    all_repos = []
+    page = 1
+    per_page = 100  # Maximum allowed by GitHub
+    # Map our sort values to GitHub API sort values
+    # Note: GitHub list repos API doesn't support sorting by stars/forks
+    # We'll fetch all repos and sort in memory for those cases
+    api_sort_map = {
+        "created": "created",
+        "updated": "updated",
+        "stars": None,  # Not supported by list API
+        "forks": None,  # Not supported by list API
+    }
+    api_sort = api_sort_map.get(sort)
+    need_manual_sort = api_sort is None
+    try:
+        while True:
+            params = {
+                "page": page,
+                "per_page": per_page,
+            }
+            # Only add sort/direction if API supports it
+            if api_sort:
+                params["sort"] = api_sort
+                params["direction"] = order
+            response = requests.get(
+                url,
+                headers=headers,
+                params=params,
+                timeout=30,
+            )
+            if response.status_code == 403:
+                error_data = response.json()
+                return {
+                    "formatted": f"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+            if response.status_code != 200:
+                error_msg = f"GitHub API error (status {response.status_code})"
+                try:
+                    error_data = response.json()
+                    if "message" in error_data:
+                        error_msg += f": {error_data['message']}"
+                except Exception:
+                    pass
+                return {
+                    "formatted": error_msg,
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+            items = response.json()
+            if not items:
+                break
+            for item in items:
+                all_repos.append(
+                    {
+                        "name": item.get("name"),
+                        "full_name": item.get("full_name"),
+                        "description": item.get("description"),
+                        "html_url": item.get("html_url"),
+                        "language": item.get("language"),
+                        "stars": item.get("stargazers_count", 0),
+                        "forks": item.get("forks_count", 0),
+                        "open_issues": item.get("open_issues_count", 0),
+                        "topics": item.get("topics", []),
+                        "updated_at": item.get("updated_at"),
+                        "created_at": item.get("created_at"),
+                    }
+                )
+            # Check if we got fewer results than requested (last page)
+            if len(items) < per_page:
+                break
+            # Stop if we have enough repos
+            if limit and len(all_repos) >= limit:
+                break
+            page += 1
+    except requests.exceptions.RequestException as e:
+        return {
+            "formatted": f"Failed to connect to GitHub API: {str(e)}",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Manual sorting if needed (for stars/forks)
+    if need_manual_sort and all_repos:
+        reverse = order == "desc"
+        all_repos.sort(key=lambda x: x[sort], reverse=reverse)
+    # Apply limit after sorting
+    if limit:
+        all_repos = all_repos[:limit]
+    if not all_repos:
+        return {
+            "formatted": f"No repositories found for {owner_type} '{owner}'",
+            "totalResults": 0,
+            "resultsShared": 0,
+        }
+    # Format output
+    lines = [f"**Found {len(all_repos)} repositories for {owner}:**\n"]
+    for i, repo in enumerate(all_repos, 1):
+        lines.append(f"{i}. **{repo['full_name']}**")
+        lines.append(
+            f"   ⭐ {repo['stars']:,} stars | 🍴 {repo['forks']:,} forks | Language: {repo['language'] or 'N/A'}"
+        )
+        if repo["description"]:
+            desc = (
+                repo["description"][:100] + "..."
+                if len(repo["description"]) > 100
+                else repo["description"]
+            )
+            lines.append(f"   {desc}")
+        lines.append(f"   URL: {repo['html_url']}")
+        if repo["topics"]:
+            lines.append(f"   Topics: {', '.join(repo['topics'][:5])}")
+        # Copyable parameters for other tools
+        lines.append(f"   Use in tools: {{'repo': '{repo['full_name']}'}}")
+        lines.append("")
+    return {
+        "formatted": "\n".join(lines),
+        "totalResults": len(all_repos),
+        "resultsShared": len(all_repos),
+    }
+# Tool specification
+GITHUB_LIST_REPOS_TOOL_SPEC = {
+    "name": "github_list_repos",
+    "description": (
+        "List and discover repositories for GitHub organizations or users with flexible sorting. "
+        "**Use when:** (1) Exploring what libraries exist for a task, (2) Finding the right library to use, "
+        "(3) Discovering popular or active projects, (4) Checking recently updated repos for latest features, "
+        "(5) Finding alternative libraries in an organization. "
+        "**Pattern:** github_list_repos (discover libraries) → github_find_examples (find usage examples) → implement. "
+        "Returns: Comprehensive repository information (stars, forks, language, topics, URLs), sorted by preference. "
+        "**Then:** Use github_find_examples on selected repo to discover example code. "
+        "Sorts by: stars (popularity), forks (community), updated (activity), created (age).\n\n"
+        "## When to use this tool\n\n"
+        "- When you need to find libraries to use in your implementation\n"
+        "- When exploring what repositories exist for a task or domain\n"
+        "- When debugging an error and looking up if others have similar issues in repos\n"
+        "- When finding the most popular or actively maintained projects for a user/org\n"
+        "## Examples\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Discover HF libraries for RLHF/alignment\n"
+        "// Use case: Find the right library for training with human feedback\n"
+        "{\n"
+        "  owner: 'huggingface',\n"
+        "  owner_type: 'org',\n"
+        "  sort: 'stars',\n"
+        "  limit: 10\n"
+        "}\n"
+        "// Returns: transformers, trl, peft, accelerate, diffusers...\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Check for recently updated HF repos\n"
+        "// Use case: Find actively maintained libraries with latest features\n"
+        "{\n"
+        "  owner: 'huggingface',\n"
+        "  owner_type: 'org',\n"
+        "  sort: 'updated',\n"
+        "  order: 'desc',\n"
+        "  limit: 15\n"
+        "}\n"
+        "// Helps identify which repos have recent improvements/fixes\n"
+        "</example>"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "owner": {
+                "type": "string",
+                "description": "GitHub username or organization name. Required.",
+            },
+            "owner_type": {
+                "type": "string",
+                "enum": ["user", "org"],
+                "description": "Whether the owner is a 'user' or 'org'. Default: 'org'.",
+            },
+            "sort": {
+                "type": "string",
+                "enum": ["stars", "forks", "updated", "created"],
+                "description": "Sort field. Options: 'stars', 'forks', 'updated', 'created'. Default: 'stars'.",
+            },
+            "order": {
+                "type": "string",
+                "enum": ["asc", "desc"],
+                "description": "Sort order. Options: 'asc', 'desc'. Default: 'desc'.",
+            },
+            "limit": {
+                "type": "integer",
+                "description": "Maximum number of repositories to return. No limit if not specified. Default: 30.",
+            },
+        },
+        "required": ["owner"],
+    },
+}
+async def github_list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
+    try:
+        result = list_repos(
+            owner=arguments["owner"],
+            owner_type=arguments.get("owner_type", "org"),
+            sort=arguments.get("sort", "stars"),
+            order=arguments.get("order", "desc"),
+            limit=arguments.get("limit"),
+        )
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error listing repositories: {str(e)}", False

agent/tools/github_read_file.py ADDED Viewed

	@@ -0,0 +1,348 @@

+"""
+GitHub Read File Tool - Read file contents from any GitHub repository with line range support
+Fetch exact file contents with metadata, supporting line ranges for efficient reading.
+"""
+import base64
+import json
+import os
+from typing import Any, Dict, Optional
+import nbformat
+import requests
+from nbconvert import MarkdownExporter
+from nbconvert.preprocessors import ClearOutputPreprocessor, TagRemovePreprocessor
+from agent.tools.types import ToolResult
+def _convert_ipynb_to_markdown(content: str) -> str:
+    """
+    Convert Jupyter notebook JSON to LLM-friendly Markdown.
+    Args:
+        content: Raw notebook JSON string
+    Returns:
+        Converted Markdown string
+    """
+    try:
+        # Parse notebook JSON
+        nb_dict = json.loads(content)
+        # Normalize cell sources (can be string or list of strings)
+        if "cells" in nb_dict:
+            for cell in nb_dict["cells"]:
+                if "source" in cell and isinstance(cell["source"], list):
+                    cell["source"] = "".join(cell["source"])
+        # Read notebook with explicit version
+        nb = nbformat.reads(json.dumps(nb_dict), as_version=4)
+        # Strip outputs for LLM readability (outputs can be noisy/large)
+        clear = ClearOutputPreprocessor()
+        nb, _ = clear.preprocess(nb, {})
+        # Optionally remove cells tagged with "hide" or similar
+        remove = TagRemovePreprocessor(
+            remove_cell_tags={"hide", "hidden", "remove"},
+            remove_input_tags=set(),
+            remove_all_outputs_tags=set(),
+        )
+        nb, _ = remove.preprocess(nb, {})
+        # Convert to markdown
+        exporter = MarkdownExporter()
+        markdown, _ = exporter.from_notebook_node(nb)
+        return markdown
+    except json.JSONDecodeError:
+        return content
+    except Exception:
+        return content
+def read_file(
+    repo: str,
+    path: str,
+    ref: str = "HEAD",
+    line_start: Optional[int] = None,
+    line_end: Optional[int] = None,
+) -> ToolResult:
+    """
+    Read file contents from a GitHub repository with line range support.
+    Args:
+        repo: Repository in format "owner/repo" (e.g., "github/github-mcp-server")
+        path: Path to file in repository (e.g., "pkg/github/search.go")
+        ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
+        line_start: Starting line number (1-indexed, inclusive)
+        line_end: Ending line number (1-indexed, inclusive)
+    Returns:
+        ToolResult with file contents and metadata
+    """
+    token = os.environ.get("GITHUB_TOKEN")
+    if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Parse repo
+    if "/" not in repo:
+        return {
+            "formatted": "Error: repo must be in format 'owner/repo'",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    owner, repo_name = repo.split("/", 1)
+    headers = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "Authorization": f"Bearer {token}",
+    }
+    # Fetch file contents
+    url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}"
+    params = {}
+    if ref and ref != "HEAD":
+        params["ref"] = ref
+    try:
+        response = requests.get(url, headers=headers, params=params, timeout=30)
+        if response.status_code == 404:
+            return {
+                "formatted": f"File not found: {path} in {repo} (ref: {ref})",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if response.status_code != 200:
+            error_msg = f"GitHub API error (status {response.status_code})"
+            try:
+                error_data = response.json()
+                if "message" in error_data:
+                    error_msg += f": {error_data['message']}"
+            except Exception:
+                pass
+            return {
+                "formatted": error_msg,
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        data = response.json()
+        # Check if it's a file
+        if data.get("type") != "file":
+            return {
+                "formatted": f"Path {path} is not a file (type: {data.get('type')})",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        # Decode content
+        content_b64 = data.get("content", "")
+        if content_b64:
+            content_b64 = content_b64.replace("\n", "").replace(" ", "")
+            content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
+        else:
+            # For large files, fetch raw content
+            raw_headers = {
+                "Accept": "application/vnd.github.raw",
+                "X-GitHub-Api-Version": "2022-11-28",
+                "Authorization": f"Bearer {token}",
+            }
+            raw_response = requests.get(
+                url, headers=raw_headers, params=params, timeout=30
+            )
+            if raw_response.status_code != 200:
+                return {
+                    "formatted": "Failed to fetch file content",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+            content = raw_response.text
+        if path.lower().endswith(".ipynb"):
+            content = _convert_ipynb_to_markdown(content)
+        # Process line ranges
+        lines = content.split("\n")
+        total_lines = len(lines)
+        truncated = False
+        if line_start is None and line_end is None:
+            # No range specified
+            if total_lines > 300:
+                line_start = 1
+                line_end = 300
+                truncated = True
+            else:
+                line_start = 1
+                line_end = total_lines
+        else:
+            # Range specified
+            if line_start is None:
+                line_start = 1
+            if line_end is None:
+                line_end = total_lines
+            # Validate range
+            line_start = max(1, line_start)
+            line_end = min(total_lines, line_end)
+            if line_start > line_end:
+                return {
+                    "formatted": f"Invalid range: line_start ({line_start}) > line_end ({line_end})",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        # Extract lines
+        selected_lines = lines[line_start - 1 : line_end]
+        selected_content = "\n".join(selected_lines)
+        # Format output
+        lines_output = [f"**Reading file from repo: {repo}, path: {path}**"]
+        if ref and ref != "HEAD":
+            lines_output.append(f"Ref: {ref}")
+        lines_output.append("\n**File content:")
+        lines_output.append("```")
+        lines_output.append(selected_content)
+        lines_output.append("```")
+        if truncated:
+            lines_output.append(
+                f"Currently showing lines {line_start}-{line_end} out of {total_lines} total lines. Use line_start and line_end to view more lines."
+            )
+        return {
+            "formatted": "\n".join(lines_output),
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    except requests.exceptions.RequestException as e:
+        return {
+            "formatted": f"Failed to connect to GitHub API: {str(e)}",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+# Tool specification
+GITHUB_READ_FILE_TOOL_SPEC = {
+    "name": "github_read_file",
+    "description": (
+        "Read file contents from GitHub repositories with line range support (default 300 lines). "
+        "⚠️ CRITICAL: Use AFTER github_find_examples to study working implementation code. "
+        "**Use when:** (1) Found example file via github_find_examples and need full code, "
+        "(2) Need to read trainer class implementation, (3) Study configuration patterns, "
+        "(4) Read specific code sections with line ranges, (5) Review code from specific branches/commits. "
+        "**Pattern:** github_find_examples (discover files) → github_read_file (read code) → implement using researched patterns. "
+        "Returns: File contents with line numbers, formatted for LLM reading. Auto-converts Jupyter notebooks to markdown. "
+        "**Then:** Implement using patterns and APIs from the example code. "
+        "**Critical for reliability:** Reading working examples prevents API errors and shows current best practices. "
+        "Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n"
+        "## When to use this tool\n\n"
+        "- When reading example code, trainer implementations, or configuration files\n"
+        "- After github_find_examples returns file paths you want to study\n"
+        "- When investigating specific code sections with line ranges\n"
+        "- When reading from specific branches, tags, or commits (use ref parameter)\n\n"
+        "## When NOT to use this tool\n\n"
+        "- When you don't know exact file path (use github_find_examples or github_search_code first)\n"
+        "- When searching for code patterns across repos (use github_search_code instead)\n\n"
+        "## Examples\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Read GRPO trainer class after finding via github_find_examples\n"
+        "// Use case: Understand GRPOTrainer API, parameters, and methods\n"
+        "{\n"
+        "  repo: 'huggingface/trl',\n"
+        "  path: 'trl/trainer/grpo_trainer.py',\n"
+        "  line_start: 1,\n"
+        "  line_end: 200\n"
+        "}\n"
+        "// Read class definition and constructor to understand current API\n"
+        "// Shows: __init__ parameters, configuration, required arguments\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Study complete training script from examples\n"
+        "// Use case: Learn end-to-end VLM fine-tuning workflow\n"
+        "{\n"
+        "  repo: 'huggingface/trl',\n"
+        "  path: 'examples/scripts/grpo_vlm.py'\n"
+        "}\n"
+        "// Returns first 300 lines - shows full training setup\n"
+        "// Use line_start/line_end if need to read more\n"
+        "</example>\n\n"
+        "<example>\n"
+        "// ML Workflow Step: Check TrainingArguments configuration patterns\n"
+        "// Use case: Learn how to structure training configs correctly\n"
+        "{\n"
+        "  repo: 'huggingface/transformers',\n"
+        "  path: 'examples/pytorch/language-modeling/run_clm.py',\n"
+        "  line_start: 50,\n"
+        "  line_end: 150\n"
+        "}\n"
+        "// Read argument parsing and config setup section\n"
+        "// Shows: current parameter names, default values, best practices\n"
+        "</example>"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "repo": {
+                "type": "string",
+                "description": "Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.",
+            },
+            "path": {
+                "type": "string",
+                "description": "Path to file in repository (e.g., 'src/index.js'). Required.",
+            },
+            "ref": {
+                "type": "string",
+                "description": "Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.",
+            },
+            "line_start": {
+                "type": "integer",
+                "description": "Starting line number (1-indexed, inclusive). Optional.",
+            },
+            "line_end": {
+                "type": "integer",
+                "description": "Ending line number (1-indexed, inclusive). Optional.",
+            },
+        },
+        "required": ["repo", "path"],
+    },
+}
+async def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
+    try:
+        result = read_file(
+            repo=arguments["repo"],
+            path=arguments["path"],
+            ref=arguments.get("ref", "HEAD"),
+            line_start=arguments.get("line_start"),
+            line_end=arguments.get("line_end"),
+        )
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error reading file: {str(e)}", False

agent/tools/hf_repo_files_tool.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""
+HF Repo Files Tool - File operations on Hugging Face repositories
+Operations: list, read, upload, delete
+"""
+import asyncio
+from typing import Any, Dict, Literal, Optional
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
+from agent.tools.types import ToolResult
+OperationType = Literal["list", "read", "upload", "delete"]
+async def _async_call(func, *args, **kwargs):
+    """Wrap synchronous HfApi calls for async context."""
+    return await asyncio.to_thread(func, *args, **kwargs)
+def _build_repo_url(repo_id: str, repo_type: str = "model") -> str:
+    """Build the Hub URL for a repository."""
+    if repo_type == "model":
+        return f"https://huggingface.co/{repo_id}"
+    return f"https://huggingface.co/{repo_type}s/{repo_id}"
+def _format_size(size_bytes: int) -> str:
+    """Format file size in human-readable form."""
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if size_bytes < 1024:
+            return f"{size_bytes:.1f}{unit}"
+        size_bytes /= 1024
+    return f"{size_bytes:.1f}PB"
+class HfRepoFilesTool:
+    """Tool for file operations on HF repos."""
+    def __init__(self, hf_token: Optional[str] = None):
+        self.api = HfApi(token=hf_token)
+    async def execute(self, args: Dict[str, Any]) -> ToolResult:
+        """Execute the specified operation."""
+        operation = args.get("operation")
+        if not operation:
+            return self._help()
+        try:
+            handlers = {
+                "list": self._list,
+                "read": self._read,
+                "upload": self._upload,
+                "delete": self._delete,
+            }
+            handler = handlers.get(operation)
+            if handler:
+                return await handler(args)
+            else:
+                return self._error(f"Unknown operation: {operation}. Valid: list, read, upload, delete")
+        except RepositoryNotFoundError:
+            return self._error(f"Repository not found: {args.get('repo_id')}")
+        except EntryNotFoundError:
+            return self._error(f"File not found: {args.get('path')}")
+        except Exception as e:
+            return self._error(f"Error: {str(e)}")
+    def _help(self) -> ToolResult:
+        """Show usage instructions."""
+        return {
+            "formatted": """**hf_repo_files** - File operations on HF repos
+**Operations:**
+- `list` - List files: `{"operation": "list", "repo_id": "gpt2"}`
+- `read` - Read file: `{"operation": "read", "repo_id": "gpt2", "path": "config.json"}`
+- `upload` - Upload: `{"operation": "upload", "repo_id": "my-model", "path": "README.md", "content": "..."}`
+- `delete` - Delete: `{"operation": "delete", "repo_id": "my-model", "patterns": ["*.tmp"]}`
+**Common params:** repo_id (required), repo_type (model/dataset/space), revision (default: main)""",
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    async def _list(self, args: Dict[str, Any]) -> ToolResult:
+        """List files in a repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return self._error("repo_id is required")
+        repo_type = args.get("repo_type", "model")
+        revision = args.get("revision", "main")
+        path = args.get("path", "")
+        items = list(await _async_call(
+            self.api.list_repo_tree,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            path_in_repo=path,
+            recursive=True,
+        ))
+        if not items:
+            return {"formatted": f"No files in {repo_id}", "totalResults": 0, "resultsShared": 0}
+        lines = []
+        total_size = 0
+        for item in sorted(items, key=lambda x: x.path):
+            if hasattr(item, "size") and item.size:
+                total_size += item.size
+                lines.append(f"{item.path} ({_format_size(item.size)})")
+            else:
+                lines.append(f"{item.path}/")
+        url = _build_repo_url(repo_id, repo_type)
+        response = f"**{repo_id}** ({len(items)} files, {_format_size(total_size)})\n{url}/tree/{revision}\n\n" + "\n".join(lines)
+        return {"formatted": response, "totalResults": len(items), "resultsShared": len(items)}
+    async def _read(self, args: Dict[str, Any]) -> ToolResult:
+        """Read file content from a repository."""
+        repo_id = args.get("repo_id")
+        path = args.get("path")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not path:
+            return self._error("path is required")
+        repo_type = args.get("repo_type", "model")
+        revision = args.get("revision", "main")
+        max_chars = args.get("max_chars", 50000)
+        file_path = await _async_call(
+            hf_hub_download,
+            repo_id=repo_id,
+            filename=path,
+            repo_type=repo_type,
+            revision=revision,
+            token=self.api.token,
+        )
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            truncated = len(content) > max_chars
+            if truncated:
+                content = content[:max_chars]
+            url = f"{_build_repo_url(repo_id, repo_type)}/blob/{revision}/{path}"
+            response = f"**{path}**{' (truncated)' if truncated else ''}\n{url}\n\n```\n{content}\n```"
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except UnicodeDecodeError:
+            import os
+            size = os.path.getsize(file_path)
+            return {"formatted": f"Binary file ({_format_size(size)})", "totalResults": 1, "resultsShared": 1}
+    async def _upload(self, args: Dict[str, Any]) -> ToolResult:
+        """Upload content to a repository."""
+        repo_id = args.get("repo_id")
+        path = args.get("path")
+        content = args.get("content")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not path:
+            return self._error("path is required")
+        if content is None:
+            return self._error("content is required")
+        repo_type = args.get("repo_type", "model")
+        revision = args.get("revision", "main")
+        create_pr = args.get("create_pr", False)
+        commit_message = args.get("commit_message", f"Upload {path}")
+        file_bytes = content.encode("utf-8") if isinstance(content, str) else content
+        result = await _async_call(
+            self.api.upload_file,
+            path_or_fileobj=file_bytes,
+            path_in_repo=path,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            commit_message=commit_message,
+            create_pr=create_pr,
+        )
+        url = _build_repo_url(repo_id, repo_type)
+        if create_pr and hasattr(result, "pr_url"):
+            response = f"**Uploaded as PR**\n{result.pr_url}"
+        else:
+            response = f"**Uploaded:** {path}\n{url}/blob/{revision}/{path}"
+        return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+    async def _delete(self, args: Dict[str, Any]) -> ToolResult:
+        """Delete files from a repository."""
+        repo_id = args.get("repo_id")
+        patterns = args.get("patterns")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not patterns:
+            return self._error("patterns is required (list of paths/wildcards)")
+        if isinstance(patterns, str):
+            patterns = [patterns]
+        repo_type = args.get("repo_type", "model")
+        revision = args.get("revision", "main")
+        create_pr = args.get("create_pr", False)
+        commit_message = args.get("commit_message", f"Delete {', '.join(patterns)}")
+        await _async_call(
+            self.api.delete_files,
+            repo_id=repo_id,
+            delete_patterns=patterns,
+            repo_type=repo_type,
+            revision=revision,
+            commit_message=commit_message,
+            create_pr=create_pr,
+        )
+        response = f"**Deleted:** {', '.join(patterns)} from {repo_id}"
+        return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+    def _error(self, message: str) -> ToolResult:
+        """Return an error result."""
+        return {"formatted": message, "totalResults": 0, "resultsShared": 0, "isError": True}
+# Tool specification
+HF_REPO_FILES_TOOL_SPEC = {
+    "name": "hf_repo_files",
+    "description": (
+        "Read and write files in HF repos (models/datasets/spaces).\n\n"
+        "## Operations\n"
+        "- **list**: List files with sizes and structure\n"
+        "- **read**: Read file content (text files only)\n"
+        "- **upload**: Upload content to repo (can create PR)\n"
+        "- **delete**: Delete files/folders (supports wildcards like *.tmp)\n\n"
+        "## Use when\n"
+        "- Need to see what files exist in a repo\n"
+        "- Want to read config.json, README.md, or other text files\n"
+        "- Uploading training scripts, configs, or results to a repo\n"
+        "- Cleaning up temporary files from a repo\n\n"
+        "## Examples\n"
+        '{"operation": "list", "repo_id": "meta-llama/Llama-2-7b"}\n'
+        '{"operation": "read", "repo_id": "gpt2", "path": "config.json"}\n'
+        '{"operation": "upload", "repo_id": "my-model", "path": "README.md", "content": "# My Model"}\n'
+        '{"operation": "upload", "repo_id": "org/model", "path": "fix.py", "content": "...", "create_pr": true}\n'
+        '{"operation": "delete", "repo_id": "my-model", "patterns": ["*.tmp", "logs/"]}\n\n'
+        "## Notes\n"
+        "- For binary files (safetensors, bin), use list to see them but can't read content\n"
+        "- upload/delete require approval (can overwrite/destroy data)\n"
+        "- Use create_pr=true to propose changes instead of direct commit\n"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": ["list", "read", "upload", "delete"],
+                "description": "Operation: list, read, upload, delete",
+            },
+            "repo_id": {
+                "type": "string",
+                "description": "Repository ID (e.g., 'username/repo-name')",
+            },
+            "repo_type": {
+                "type": "string",
+                "enum": ["model", "dataset", "space"],
+                "description": "Repository type (default: model)",
+            },
+            "revision": {
+                "type": "string",
+                "description": "Branch/tag/commit (default: main)",
+            },
+            "path": {
+                "type": "string",
+                "description": "File path for read/upload",
+            },
+            "content": {
+                "type": "string",
+                "description": "File content for upload",
+            },
+            "patterns": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Patterns to delete (e.g., ['*.tmp', 'logs/'])",
+            },
+            "create_pr": {
+                "type": "boolean",
+                "description": "Create PR instead of direct commit",
+            },
+            "commit_message": {
+                "type": "string",
+                "description": "Custom commit message",
+            },
+        },
+        "required": ["operation"],
+    },
+}
+async def hf_repo_files_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = HfRepoFilesTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error: {str(e)}", False

agent/tools/hf_repo_git_tool.py ADDED Viewed

	@@ -0,0 +1,663 @@

+"""
+HF Repo Git Tool - Git-like operations on Hugging Face repositories
+Operations: branches, tags, PRs, repo management
+"""
+import asyncio
+from typing import Any, Dict, Literal, Optional
+from huggingface_hub import HfApi
+from huggingface_hub.utils import RepositoryNotFoundError
+from agent.tools.types import ToolResult
+OperationType = Literal[
+    "create_branch", "delete_branch",
+    "create_tag", "delete_tag",
+    "list_refs",
+    "create_pr", "list_prs", "get_pr", "merge_pr", "close_pr", "comment_pr", "change_pr_status",
+    "create_repo", "update_repo",
+]
+async def _async_call(func, *args, **kwargs):
+    """Wrap synchronous HfApi calls for async context."""
+    return await asyncio.to_thread(func, *args, **kwargs)
+def _build_repo_url(repo_id: str, repo_type: str = "model") -> str:
+    """Build the Hub URL for a repository."""
+    if repo_type == "model":
+        return f"https://huggingface.co/{repo_id}"
+    return f"https://huggingface.co/{repo_type}s/{repo_id}"
+class HfRepoGitTool:
+    """Tool for git-like operations on HF repos."""
+    def __init__(self, hf_token: Optional[str] = None):
+        self.api = HfApi(token=hf_token)
+    async def execute(self, args: Dict[str, Any]) -> ToolResult:
+        """Execute the specified operation."""
+        operation = args.get("operation")
+        if not operation:
+            return self._help()
+        try:
+            handlers = {
+                "create_branch": self._create_branch,
+                "delete_branch": self._delete_branch,
+                "create_tag": self._create_tag,
+                "delete_tag": self._delete_tag,
+                "list_refs": self._list_refs,
+                "create_pr": self._create_pr,
+                "list_prs": self._list_prs,
+                "get_pr": self._get_pr,
+                "merge_pr": self._merge_pr,
+                "close_pr": self._close_pr,
+                "comment_pr": self._comment_pr,
+                "change_pr_status": self._change_pr_status,
+                "create_repo": self._create_repo,
+                "update_repo": self._update_repo,
+            }
+            handler = handlers.get(operation)
+            if handler:
+                return await handler(args)
+            else:
+                ops = ", ".join(handlers.keys())
+                return self._error(f"Unknown operation: {operation}. Valid: {ops}")
+        except RepositoryNotFoundError:
+            return self._error(f"Repository not found: {args.get('repo_id')}")
+        except Exception as e:
+            return self._error(f"Error: {str(e)}")
+    def _help(self) -> ToolResult:
+        """Show usage instructions."""
+        return {
+            "formatted": """**hf_repo_git** - Git-like operations on HF repos
+**Branch/Tag:**
+- `create_branch`: `{"operation": "create_branch", "repo_id": "...", "branch": "dev"}`
+- `delete_branch`: `{"operation": "delete_branch", "repo_id": "...", "branch": "dev"}`
+- `create_tag`: `{"operation": "create_tag", "repo_id": "...", "tag": "v1.0"}`
+- `delete_tag`: `{"operation": "delete_tag", "repo_id": "...", "tag": "v1.0"}`
+- `list_refs`: `{"operation": "list_refs", "repo_id": "..."}`
+**PRs:**
+- `create_pr`: `{"operation": "create_pr", "repo_id": "...", "title": "..."}` (creates draft PR)
+- `list_prs`: `{"operation": "list_prs", "repo_id": "..."}` (shows status: draft/open/merged/closed)
+- `get_pr`: `{"operation": "get_pr", "repo_id": "...", "pr_num": 1}` (shows status)
+- `change_pr_status`: `{"operation": "change_pr_status", "repo_id": "...", "pr_num": 1, "new_status": "open"}` (change draft to open)
+- `merge_pr`: `{"operation": "merge_pr", "repo_id": "...", "pr_num": 1}`
+- `close_pr`: `{"operation": "close_pr", "repo_id": "...", "pr_num": 1}`
+- `comment_pr`: `{"operation": "comment_pr", "repo_id": "...", "pr_num": 1, "comment": "..."}`
+**Repo:**
+- `create_repo`: `{"operation": "create_repo", "repo_id": "my-model", "private": true}`
+- `update_repo`: `{"operation": "update_repo", "repo_id": "...", "private": false}`""",
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    # =========================================================================
+    # BRANCH OPERATIONS
+    # =========================================================================
+    async def _create_branch(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a new branch."""
+        repo_id = args.get("repo_id")
+        branch = args.get("branch")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not branch:
+            return self._error("branch is required")
+        repo_type = args.get("repo_type", "model")
+        from_rev = args.get("from_rev", "main")
+        await _async_call(
+            self.api.create_branch,
+            repo_id=repo_id,
+            branch=branch,
+            revision=from_rev,
+            repo_type=repo_type,
+            exist_ok=args.get("exist_ok", False),
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/tree/{branch}"
+        return {"formatted": f"**Branch created:** {branch}\n{url}", "totalResults": 1, "resultsShared": 1}
+    async def _delete_branch(self, args: Dict[str, Any]) -> ToolResult:
+        """Delete a branch."""
+        repo_id = args.get("repo_id")
+        branch = args.get("branch")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not branch:
+            return self._error("branch is required")
+        repo_type = args.get("repo_type", "model")
+        await _async_call(
+            self.api.delete_branch,
+            repo_id=repo_id,
+            branch=branch,
+            repo_type=repo_type,
+        )
+        return {"formatted": f"**Branch deleted:** {branch}", "totalResults": 1, "resultsShared": 1}
+    # =========================================================================
+    # TAG OPERATIONS
+    # =========================================================================
+    async def _create_tag(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a tag."""
+        repo_id = args.get("repo_id")
+        tag = args.get("tag")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not tag:
+            return self._error("tag is required")
+        repo_type = args.get("repo_type", "model")
+        revision = args.get("revision", "main")
+        tag_message = args.get("tag_message", "")
+        await _async_call(
+            self.api.create_tag,
+            repo_id=repo_id,
+            tag=tag,
+            revision=revision,
+            tag_message=tag_message,
+            repo_type=repo_type,
+            exist_ok=args.get("exist_ok", False),
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/tree/{tag}"
+        return {"formatted": f"**Tag created:** {tag}\n{url}", "totalResults": 1, "resultsShared": 1}
+    async def _delete_tag(self, args: Dict[str, Any]) -> ToolResult:
+        """Delete a tag."""
+        repo_id = args.get("repo_id")
+        tag = args.get("tag")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not tag:
+            return self._error("tag is required")
+        repo_type = args.get("repo_type", "model")
+        await _async_call(
+            self.api.delete_tag,
+            repo_id=repo_id,
+            tag=tag,
+            repo_type=repo_type,
+        )
+        return {"formatted": f"**Tag deleted:** {tag}", "totalResults": 1, "resultsShared": 1}
+    # =========================================================================
+    # LIST REFS
+    # =========================================================================
+    async def _list_refs(self, args: Dict[str, Any]) -> ToolResult:
+        """List branches and tags."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return self._error("repo_id is required")
+        repo_type = args.get("repo_type", "model")
+        refs = await _async_call(
+            self.api.list_repo_refs,
+            repo_id=repo_id,
+            repo_type=repo_type,
+        )
+        branches = [b.name for b in refs.branches] if refs.branches else []
+        tags = [t.name for t in refs.tags] if hasattr(refs, 'tags') and refs.tags else []
+        url = _build_repo_url(repo_id, repo_type)
+        lines = [f"**{repo_id}**", url, ""]
+        if branches:
+            lines.append(f"**Branches ({len(branches)}):** " + ", ".join(branches))
+        else:
+            lines.append("**Branches:** none")
+        if tags:
+            lines.append(f"**Tags ({len(tags)}):** " + ", ".join(tags))
+        else:
+            lines.append("**Tags:** none")
+        return {"formatted": "\n".join(lines), "totalResults": len(branches) + len(tags), "resultsShared": len(branches) + len(tags)}
+    # =========================================================================
+    # PR OPERATIONS
+    # =========================================================================
+    async def _create_pr(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a pull request."""
+        repo_id = args.get("repo_id")
+        title = args.get("title")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not title:
+            return self._error("title is required")
+        repo_type = args.get("repo_type", "model")
+        description = args.get("description", "")
+        result = await _async_call(
+            self.api.create_pull_request,
+            repo_id=repo_id,
+            title=title,
+            description=description,
+            repo_type=repo_type,
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/discussions/{result.num}"
+        return {
+            "formatted": f"**Draft PR #{result.num} created:** {title}\n{url}\n\nAdd commits via upload with revision=\"refs/pr/{result.num}\"",
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    async def _list_prs(self, args: Dict[str, Any]) -> ToolResult:
+        """List PRs and discussions."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return self._error("repo_id is required")
+        repo_type = args.get("repo_type", "model")
+        status = args.get("status", "all")  # open, closed, all
+        discussions = list(self.api.get_repo_discussions(
+            repo_id=repo_id,
+            repo_type=repo_type,
+            discussion_status=status if status != "all" else None,
+        ))
+        if not discussions:
+            return {"formatted": f"No discussions in {repo_id}", "totalResults": 0, "resultsShared": 0}
+        url = _build_repo_url(repo_id, repo_type)
+        lines = [f"**{repo_id}** - {len(discussions)} discussions", f"{url}/discussions", ""]
+        for d in discussions[:20]:
+            if d.status == "draft":
+                status_label = "[DRAFT]"
+            elif d.status == "open":
+                status_label = "[OPEN]"
+            elif d.status == "merged":
+                status_label = "[MERGED]"
+            else:
+                status_label = "[CLOSED]"
+            type_label = "PR" if d.is_pull_request else "D"
+            lines.append(f"{status_label} #{d.num} [{type_label}] {d.title}")
+        return {"formatted": "\n".join(lines), "totalResults": len(discussions), "resultsShared": min(20, len(discussions))}
+    async def _get_pr(self, args: Dict[str, Any]) -> ToolResult:
+        """Get PR details."""
+        repo_id = args.get("repo_id")
+        pr_num = args.get("pr_num")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not pr_num:
+            return self._error("pr_num is required")
+        repo_type = args.get("repo_type", "model")
+        pr = await _async_call(
+            self.api.get_discussion_details,
+            repo_id=repo_id,
+            discussion_num=int(pr_num),
+            repo_type=repo_type,
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}"
+        status_map = {
+            "draft": "Draft",
+            "open": "Open",
+            "merged": "Merged",
+            "closed": "Closed"
+        }
+        status = status_map.get(pr.status, pr.status.capitalize())
+        type_label = "Pull Request" if pr.is_pull_request else "Discussion"
+        lines = [
+            f"**{type_label} #{pr_num}:** {pr.title}",
+            f"**Status:** {status}",
+            f"**Author:** {pr.author}",
+            url,
+        ]
+        if pr.is_pull_request:
+            if pr.status == "draft":
+                lines.append(f"\nTo add commits: upload with revision=\"refs/pr/{pr_num}\"")
+            elif pr.status == "open":
+                lines.append(f"\nTo add commits: upload with revision=\"refs/pr/{pr_num}\"")
+        return {"formatted": "\n".join(lines), "totalResults": 1, "resultsShared": 1}
+    async def _merge_pr(self, args: Dict[str, Any]) -> ToolResult:
+        """Merge a pull request."""
+        repo_id = args.get("repo_id")
+        pr_num = args.get("pr_num")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not pr_num:
+            return self._error("pr_num is required")
+        repo_type = args.get("repo_type", "model")
+        comment = args.get("comment", "")
+        await _async_call(
+            self.api.merge_pull_request,
+            repo_id=repo_id,
+            discussion_num=int(pr_num),
+            comment=comment,
+            repo_type=repo_type,
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}"
+        return {"formatted": f"**PR #{pr_num} merged**\n{url}", "totalResults": 1, "resultsShared": 1}
+    async def _close_pr(self, args: Dict[str, Any]) -> ToolResult:
+        """Close a PR/discussion."""
+        repo_id = args.get("repo_id")
+        pr_num = args.get("pr_num")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not pr_num:
+            return self._error("pr_num is required")
+        repo_type = args.get("repo_type", "model")
+        comment = args.get("comment", "")
+        await _async_call(
+            self.api.change_discussion_status,
+            repo_id=repo_id,
+            discussion_num=int(pr_num),
+            new_status="closed",
+            comment=comment,
+            repo_type=repo_type,
+        )
+        return {"formatted": f"**Discussion #{pr_num} closed**", "totalResults": 1, "resultsShared": 1}
+    async def _comment_pr(self, args: Dict[str, Any]) -> ToolResult:
+        """Add a comment to a PR/discussion."""
+        repo_id = args.get("repo_id")
+        pr_num = args.get("pr_num")
+        comment = args.get("comment")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not pr_num:
+            return self._error("pr_num is required")
+        if not comment:
+            return self._error("comment is required")
+        repo_type = args.get("repo_type", "model")
+        await _async_call(
+            self.api.comment_discussion,
+            repo_id=repo_id,
+            discussion_num=int(pr_num),
+            comment=comment,
+            repo_type=repo_type,
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}"
+        return {"formatted": f"**Comment added to #{pr_num}**\n{url}", "totalResults": 1, "resultsShared": 1}
+    async def _change_pr_status(self, args: Dict[str, Any]) -> ToolResult:
+        """Change PR/discussion status (mainly to convert draft to open)."""
+        repo_id = args.get("repo_id")
+        pr_num = args.get("pr_num")
+        new_status = args.get("new_status")
+        if not repo_id:
+            return self._error("repo_id is required")
+        if not pr_num:
+            return self._error("pr_num is required")
+        if not new_status:
+            return self._error("new_status is required (open or closed)")
+        repo_type = args.get("repo_type", "model")
+        comment = args.get("comment", "")
+        await _async_call(
+            self.api.change_discussion_status,
+            repo_id=repo_id,
+            discussion_num=int(pr_num),
+            new_status=new_status,
+            comment=comment,
+            repo_type=repo_type,
+        )
+        url = f"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}"
+        return {"formatted": f"**PR #{pr_num} status changed to {new_status}**\n{url}", "totalResults": 1, "resultsShared": 1}
+    # =========================================================================
+    # REPO MANAGEMENT
+    # =========================================================================
+    async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a new repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return self._error("repo_id is required")
+        repo_type = args.get("repo_type", "model")
+        private = args.get("private", True)
+        space_sdk = args.get("space_sdk")
+        if repo_type == "space" and not space_sdk:
+            return self._error("space_sdk required for spaces (gradio/streamlit/docker/static)")
+        kwargs = {
+            "repo_id": repo_id,
+            "repo_type": repo_type,
+            "private": private,
+            "exist_ok": args.get("exist_ok", False),
+        }
+        if space_sdk:
+            kwargs["space_sdk"] = space_sdk
+        result = await _async_call(self.api.create_repo, **kwargs)
+        return {
+            "formatted": f"**Repository created:** {repo_id}\n**Private:** {private}\n{result}",
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    async def _update_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Update repository settings."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return self._error("repo_id is required")
+        repo_type = args.get("repo_type", "model")
+        private = args.get("private")
+        gated = args.get("gated")
+        if private is None and gated is None:
+            return self._error("Specify private (bool) or gated ('auto'/'manual'/false)")
+        kwargs = {"repo_id": repo_id, "repo_type": repo_type}
+        if private is not None:
+            kwargs["private"] = private
+        if gated is not None:
+            kwargs["gated"] = gated
+        await _async_call(self.api.update_repo_settings, **kwargs)
+        changes = []
+        if private is not None:
+            changes.append(f"private={private}")
+        if gated is not None:
+            changes.append(f"gated={gated}")
+        url = f"{_build_repo_url(repo_id, repo_type)}/settings"
+        return {"formatted": f"**Settings updated:** {', '.join(changes)}\n{url}", "totalResults": 1, "resultsShared": 1}
+    def _error(self, message: str) -> ToolResult:
+        """Return an error result."""
+        return {"formatted": message, "totalResults": 0, "resultsShared": 0, "isError": True}
+# Tool specification
+HF_REPO_GIT_TOOL_SPEC = {
+    "name": "hf_repo_git",
+    "description": (
+        "Git-like operations on HF repos: branches, tags, PRs, and repo management.\n\n"
+        "## Operations\n"
+        "**Branches:** create_branch, delete_branch, list_refs\n"
+        "**Tags:** create_tag, delete_tag\n"
+        "**PRs:** create_pr, list_prs, get_pr, merge_pr, close_pr, comment_pr, change_pr_status\n"
+        "**Repo:** create_repo, update_repo\n\n"
+        "## Use when\n"
+        "- Creating feature branches for experiments\n"
+        "- Tagging model versions (v1.0, v2.0)\n"
+        "- Opening PRs to contribute to repos you don't own\n"
+        "- Reviewing and merging PRs on your repos\n"
+        "- Creating new model/dataset/space repos\n"
+        "- Changing repo visibility (public/private) or gated access\n\n"
+        "## Examples\n"
+        '{"operation": "list_refs", "repo_id": "my-model"}\n'
+        '{"operation": "create_branch", "repo_id": "my-model", "branch": "experiment-v2"}\n'
+        '{"operation": "create_tag", "repo_id": "my-model", "tag": "v1.0", "revision": "main"}\n'
+        '{"operation": "create_pr", "repo_id": "org/model", "title": "Fix tokenizer config"}\n'
+        '{"operation": "change_pr_status", "repo_id": "my-model", "pr_num": 1, "new_status": "open"}\n'
+        '{"operation": "merge_pr", "repo_id": "my-model", "pr_num": 3}\n'
+        '{"operation": "create_repo", "repo_id": "my-new-model", "private": true}\n'
+        '{"operation": "update_repo", "repo_id": "my-model", "gated": "auto"}\n\n'
+        "## PR Workflow\n"
+        "1. create_pr → creates draft PR (empty by default)\n"
+        "2. Upload files with revision='refs/pr/N' to add commits\n"
+        "3. change_pr_status with new_status='open' to publish (convert draft to open)\n"
+        "4. merge_pr when ready\n\n"
+        "## Notes\n"
+        "- PR status: draft (default), open, merged, closed\n"
+        "- delete_branch, delete_tag, merge_pr, create_repo, update_repo require approval\n"
+        "- For spaces, create_repo needs space_sdk (gradio/streamlit/docker/static)\n"
+        "- gated options: 'auto' (instant), 'manual' (review), false (open)\n"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": [
+                    "create_branch", "delete_branch",
+                    "create_tag", "delete_tag", "list_refs",
+                    "create_pr", "list_prs", "get_pr", "merge_pr", "close_pr", "comment_pr", "change_pr_status",
+                    "create_repo", "update_repo",
+                ],
+                "description": "Operation to execute",
+            },
+            "repo_id": {
+                "type": "string",
+                "description": "Repository ID (e.g., 'username/repo-name')",
+            },
+            "repo_type": {
+                "type": "string",
+                "enum": ["model", "dataset", "space"],
+                "description": "Repository type (default: model)",
+            },
+            "branch": {
+                "type": "string",
+                "description": "Branch name (create_branch, delete_branch)",
+            },
+            "from_rev": {
+                "type": "string",
+                "description": "Create branch from this revision (default: main)",
+            },
+            "tag": {
+                "type": "string",
+                "description": "Tag name (create_tag, delete_tag)",
+            },
+            "revision": {
+                "type": "string",
+                "description": "Revision for tag (default: main)",
+            },
+            "tag_message": {
+                "type": "string",
+                "description": "Tag description",
+            },
+            "title": {
+                "type": "string",
+                "description": "PR title (create_pr)",
+            },
+            "description": {
+                "type": "string",
+                "description": "PR description (create_pr)",
+            },
+            "pr_num": {
+                "type": "integer",
+                "description": "PR/discussion number",
+            },
+            "comment": {
+                "type": "string",
+                "description": "Comment text",
+            },
+            "status": {
+                "type": "string",
+                "enum": ["open", "closed", "all"],
+                "description": "Filter PRs by status (list_prs)",
+            },
+            "new_status": {
+                "type": "string",
+                "enum": ["open", "closed"],
+                "description": "New status for PR/discussion (change_pr_status)",
+            },
+            "private": {
+                "type": "boolean",
+                "description": "Make repo private (create_repo, update_repo)",
+            },
+            "gated": {
+                "type": "string",
+                "enum": ["auto", "manual", "false"],
+                "description": "Gated access setting (update_repo)",
+            },
+            "space_sdk": {
+                "type": "string",
+                "enum": ["gradio", "streamlit", "docker", "static"],
+                "description": "Space SDK (required for create_repo with space)",
+            },
+        },
+        "required": ["operation"],
+    },
+}
+async def hf_repo_git_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = HfRepoGitTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error: {str(e)}", False

agent/tools/jobs_tool.py ADDED Viewed

	@@ -0,0 +1,1042 @@

+"""
+Hugging Face Jobs Tool - Using huggingface-hub library
+Refactored to use official huggingface-hub library instead of custom HTTP client
+"""
+import asyncio
+import base64
+import http.client
+import os
+import re
+from typing import Any, Dict, Literal, Optional, Callable, Awaitable
+import logging
+import httpx
+from huggingface_hub import HfApi
+from huggingface_hub.utils import HfHubHTTPError
+from agent.core.session import Event
+from agent.tools.types import ToolResult
+logger = logging.getLogger(__name__)
+from agent.tools.utilities import (
+    format_job_details,
+    format_jobs_table,
+    format_scheduled_job_details,
+    format_scheduled_jobs_table,
+)
+# Hardware flavors
+CPU_FLAVORS = ["cpu-basic", "cpu-upgrade", "cpu-performance", "cpu-xl"]
+GPU_FLAVORS = [
+    "sprx8",
+    "zero-a10g",
+    "t4-small",
+    "t4-medium",
+    "l4x1",
+    "l4x4",
+    "l40sx1",
+    "l40sx4",
+    "l40sx8",
+    "a10g-small",
+    "a10g-large",
+    "a10g-largex2",
+    "a10g-largex4",
+    "a100-large",
+    "h100",
+    "h100x8",
+]
+# Detailed specs for display (vCPU/RAM/GPU VRAM)
+CPU_FLAVORS_DESC = (
+    "cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB), cpu-performance, cpu-xl"
+)
+GPU_FLAVORS_DESC = (
+    "t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), "
+    "l4x1(8vCPU/30GB/GPU 24GB), l4x4(48vCPU/186GB/GPU 96GB), "
+    "l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB), "
+    "a10g-small(4vCPU/14GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), "
+    "a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), "
+    "a100-large(12vCPU/142GB/GPU 80GB), h100(23vCPU/240GB/GPU 80GB), h100x8(184vCPU/1920GB/GPU 640GB), "
+    "zero-a10g(dynamic alloc)"
+)
+SPECIALIZED_FLAVORS = ["inf2x6"]
+ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
+# Operation names
+OperationType = Literal[
+    "run",
+    "ps",
+    "logs",
+    "inspect",
+    "cancel",
+    "scheduled run",
+    "scheduled ps",
+    "scheduled inspect",
+    "scheduled delete",
+    "scheduled suspend",
+    "scheduled resume",
+]
+# Constants
+UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
+def _filter_uv_install_output(logs: list[str]) -> list[str]:
+    """
+    Filter out UV package installation output from logs.
+    Replaces installation details with "[installs truncated]" and keeps
+    the "Installed X packages in Y ms/s" summary line.
+    Args:
+        logs: List of log lines
+    Returns:
+        Filtered list of log lines
+    """
+    if not logs:
+        return logs
+    # Regex pattern to match: "Installed X packages in Y ms" or "Installed X package in Y s"
+    install_pattern = re.compile(
+        r"^Installed\s+\d+\s+packages?\s+in\s+\d+(?:\.\d+)?\s*(?:ms|s)$"
+    )
+    # Find the index of the "Installed X packages" line
+    install_line_idx = None
+    for idx, line in enumerate(logs):
+        if install_pattern.match(line.strip()):
+            install_line_idx = idx
+            break
+    # If pattern found, replace installation details with truncation message
+    if install_line_idx is not None and install_line_idx > 0:
+        # Keep logs from the "Installed X packages" line onward
+        # Add truncation message before the "Installed" line
+        return ["[installs truncated]"] + logs[install_line_idx:]
+    # If pattern not found, return original logs
+    return logs
+def _add_environment_variables(
+    params: Dict[str, Any] | None, user_token: str | None = None
+) -> Dict[str, Any]:
+    # Prefer the authenticated user's OAuth token, fall back to global env var
+    token = user_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
+    # Start with user-provided env vars, then force-set token last
+    result = dict(params or {})
+    # If the caller passed HF_TOKEN="$HF_TOKEN", ignore it.
+    if result.get("HF_TOKEN", "").strip().startswith("$"):
+        result.pop("HF_TOKEN", None)
+    # Set both names to be safe (different libs check different vars)
+    if token:
+        result["HF_TOKEN"] = token
+        result["HUGGINGFACE_HUB_TOKEN"] = token
+    return result
+def _build_uv_command(
+    script: str,
+    with_deps: list[str] | None = None,
+    python: str | None = None,
+    script_args: list[str] | None = None,
+) -> list[str]:
+    """Build UV run command"""
+    parts = ["uv", "run"]
+    if with_deps:
+        for dep in with_deps:
+            parts.extend(["--with", dep])
+    if python:
+        parts.extend(["-p", python])
+    parts.append(script)
+    if script_args:
+        parts.extend(script_args)
+    # add defaults
+    # parts.extend(["--push_to_hub"])
+    return parts
+def _wrap_inline_script(
+    script: str,
+    with_deps: list[str] | None = None,
+    python: str | None = None,
+    script_args: list[str] | None = None,
+) -> str:
+    """Wrap inline script with base64 encoding to avoid file creation"""
+    encoded = base64.b64encode(script.encode("utf-8")).decode("utf-8")
+    # Build the uv command with stdin (-)
+    uv_command = _build_uv_command("-", with_deps, python, script_args)
+    # Join command parts with proper spacing
+    uv_command_str = " ".join(uv_command)
+    return f'echo "{encoded}" | base64 -d | {uv_command_str}'
+def _ensure_hf_transfer_dependency(deps: list[str] | None) -> list[str]:
+    """Ensure hf-transfer is included in the dependencies list"""
+    if isinstance(deps, list):
+        deps_copy = deps.copy()  # Don't modify the original
+        if "hf-transfer" not in deps_copy:
+            deps_copy.append("hf-transfer")
+        return deps_copy
+    return ["hf-transfer"]
+def _resolve_uv_command(
+    script: str,
+    with_deps: list[str] | None = None,
+    python: str | None = None,
+    script_args: list[str] | None = None,
+) -> list[str]:
+    """Resolve UV command based on script source (URL, inline, or file path)"""
+    # If URL, use directly
+    if script.startswith("http://") or script.startswith("https://"):
+        return _build_uv_command(script, with_deps, python, script_args)
+    # If contains newline, treat as inline script
+    if "\n" in script:
+        wrapped = _wrap_inline_script(script, with_deps, python, script_args)
+        return ["/bin/sh", "-lc", wrapped]
+    # Otherwise, treat as file path
+    return _build_uv_command(script, with_deps, python, script_args)
+async def _async_call(func, *args, **kwargs):
+    """Wrap synchronous HfApi calls for async context"""
+    return await asyncio.to_thread(func, *args, **kwargs)
+def _job_info_to_dict(job_info) -> Dict[str, Any]:
+    """Convert JobInfo object to dictionary for formatting functions"""
+    return {
+        "id": job_info.id,
+        "status": {"stage": job_info.status.stage, "message": job_info.status.message},
+        "command": job_info.command,
+        "createdAt": job_info.created_at.isoformat(),
+        "dockerImage": job_info.docker_image,
+        "spaceId": job_info.space_id,
+        "hardware_flavor": job_info.flavor,
+        "owner": {"name": job_info.owner.name},
+    }
+def _scheduled_job_info_to_dict(scheduled_job_info) -> Dict[str, Any]:
+    """Convert ScheduledJobInfo object to dictionary for formatting functions"""
+    job_spec = scheduled_job_info.job_spec
+    # Extract last run and next run from status
+    last_run = None
+    next_run = None
+    if scheduled_job_info.status:
+        if scheduled_job_info.status.last_job:
+            last_run = scheduled_job_info.status.last_job.created_at
+            if last_run:
+                last_run = (
+                    last_run.isoformat()
+                    if hasattr(last_run, "isoformat")
+                    else str(last_run)
+                )
+        if scheduled_job_info.status.next_job_run_at:
+            next_run = scheduled_job_info.status.next_job_run_at
+            next_run = (
+                next_run.isoformat()
+                if hasattr(next_run, "isoformat")
+                else str(next_run)
+            )
+    return {
+        "id": scheduled_job_info.id,
+        "schedule": scheduled_job_info.schedule,
+        "suspend": scheduled_job_info.suspend,
+        "lastRun": last_run,
+        "nextRun": next_run,
+        "jobSpec": {
+            "dockerImage": job_spec.docker_image,
+            "spaceId": job_spec.space_id,
+            "command": job_spec.command or [],
+            "hardware_flavor": job_spec.flavor or "cpu-basic",
+        },
+    }
+class HfJobsTool:
+    """Tool for managing Hugging Face compute jobs using huggingface-hub library"""
+    def __init__(
+        self,
+        hf_token: Optional[str] = None,
+        namespace: Optional[str] = None,
+        log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
+    ):
+        self.hf_token = hf_token
+        self.api = HfApi(token=hf_token)
+        self.namespace = namespace
+        self.log_callback = log_callback
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """Execute the specified operation"""
+        operation = params.get("operation")
+        args = params
+        # If no operation provided, return error
+        if not operation:
+            return {
+                "formatted": "Error: 'operation' parameter is required. See tool description for available operations and usage examples.",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        # Normalize operation name
+        operation = operation.lower()
+        try:
+            # Route to appropriate handler
+            if operation == "run":
+                return await self._run_job(args)
+            elif operation == "ps":
+                return await self._list_jobs(args)
+            elif operation == "logs":
+                return await self._get_logs(args)
+            elif operation == "inspect":
+                return await self._inspect_job(args)
+            elif operation == "cancel":
+                return await self._cancel_job(args)
+            elif operation == "scheduled run":
+                return await self._scheduled_run(args)
+            elif operation == "scheduled ps":
+                return await self._list_scheduled_jobs(args)
+            elif operation == "scheduled inspect":
+                return await self._inspect_scheduled_job(args)
+            elif operation == "scheduled delete":
+                return await self._delete_scheduled_job(args)
+            elif operation == "scheduled suspend":
+                return await self._suspend_scheduled_job(args)
+            elif operation == "scheduled resume":
+                return await self._resume_scheduled_job(args)
+            else:
+                return {
+                    "formatted": f'Unknown operation: "{operation}"\n\n'
+                    "Available operations:\n"
+                    "- run, ps, logs, inspect, cancel\n"
+                    "- scheduled run, scheduled ps, scheduled inspect, "
+                    "scheduled delete, scheduled suspend, scheduled resume\n\n"
+                    "Call this tool with no operation for full usage instructions.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except HfHubHTTPError as e:
+            return {
+                "formatted": f"API Error: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Error executing {operation}: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _wait_for_job_completion(
+        self, job_id: str, namespace: Optional[str] = None
+    ) -> tuple[str, list[str]]:
+        """
+        Stream job logs until completion, printing them in real-time.
+        Implements retry logic to handle connection drops during long-running jobs.
+        Returns:
+            tuple: (final_status, all_logs)
+        """
+        all_logs = []
+        terminal_states = {"COMPLETED", "FAILED", "CANCELED", "ERROR"}
+        max_retries = 100  # Allow many retries for 8h+ jobs
+        retry_delay = 5  # Seconds between retries
+        for _ in range(max_retries):
+            try:
+                # Use a queue to bridge sync generator to async consumer
+                queue = asyncio.Queue()
+                loop = asyncio.get_running_loop()
+                def log_producer():
+                    try:
+                        # fetch_job_logs is a blocking sync generator
+                        logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=namespace)
+                        for line in logs_gen:
+                            # Push line to queue thread-safely
+                            loop.call_soon_threadsafe(queue.put_nowait, line)
+                        # Signal EOF
+                        loop.call_soon_threadsafe(queue.put_nowait, None)
+                    except Exception as e:
+                        # Signal error
+                        loop.call_soon_threadsafe(queue.put_nowait, e)
+                # Start producer in a background thread so it doesn't block the event loop
+                producer_future = loop.run_in_executor(None, log_producer)
+                # Consume logs from the queue as they arrive
+                while True:
+                    item = await queue.get()
+                    # EOF sentinel
+                    if item is None:
+                        break
+                    # Error occurred in producer
+                    if isinstance(item, Exception):
+                        raise item
+                    # Process log line
+                    log_line = item
+                    logger.debug(log_line)
+                    if self.log_callback:
+                        await self.log_callback(log_line)
+                    all_logs.append(log_line)
+                # If we get here, streaming completed normally (EOF received)
+                # Wait for thread to cleanup (should be done)
+                await producer_future
+                break
+            except (
+                ConnectionError,
+                TimeoutError,
+                OSError,
+                http.client.IncompleteRead,
+                httpx.RemoteProtocolError,
+                httpx.ReadError,
+                HfHubHTTPError,
+            ) as e:
+                # Connection dropped - check if job is still running
+                try:
+                    job_info = await _async_call(
+                        self.api.inspect_job, job_id=job_id, namespace=namespace
+                    )
+                    current_status = job_info.status.stage
+                    if current_status in terminal_states:
+                        # Job finished, no need to retry
+                        logger.info(f"Job reached terminal state: {current_status}")
+                        break
+                    # Job still running, retry connection
+                    logger.warning(
+                        f"Connection interrupted ({str(e)[:50]}...), reconnecting in {retry_delay}s..."
+                    )
+                    await asyncio.sleep(retry_delay)
+                    continue
+                except (ConnectionError, TimeoutError, OSError):
+                    # Can't even check job status, wait and retry
+                    logger.warning(f"Connection error, retrying in {retry_delay}s...")
+                    await asyncio.sleep(retry_delay)
+                    continue
+        # Fetch final job status
+        job_info = await _async_call(
+            self.api.inspect_job, job_id=job_id, namespace=namespace
+        )
+        final_status = job_info.status.stage
+        return final_status, all_logs
+    async def _run_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Run a job using HfApi.run_job() - smart detection of Python vs Docker mode"""
+        try:
+            script = args.get("script")
+            command = args.get("command")
+            # Validate mutually exclusive parameters
+            if script and command:
+                raise ValueError(
+                    "'script' and 'command' are mutually exclusive. Provide one or the other, not both."
+                )
+            if not script and not command:
+                raise ValueError(
+                    "Either 'script' (for Python) or 'command' (for Docker) must be provided."
+                )
+            # Python mode: script provided
+            if script:
+                # Get dependencies and ensure hf-transfer is included
+                deps = _ensure_hf_transfer_dependency(args.get("dependencies"))
+                # Resolve the command based on script type (URL, inline, or file)
+                command = _resolve_uv_command(
+                    script=script,
+                    with_deps=deps,
+                    python=args.get("python"),
+                    script_args=args.get("script_args"),
+                )
+                # Use UV image unless overridden
+                image = args.get("image", UV_DEFAULT_IMAGE)
+                job_type = "Python"
+            # Docker mode: command provided
+            else:
+                image = args.get("image", "python:3.12")
+                job_type = "Docker"
+            # Run the job
+            job = await _async_call(
+                self.api.run_job,
+                image=image,
+                command=command,
+                env=args.get("env"),
+                secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
+                flavor=args.get("hardware_flavor", "cpu-basic"),
+                timeout=args.get("timeout", "30m"),
+                namespace=self.namespace,
+            )
+            # Wait for completion and stream logs
+            logger.info(f"{job_type} job started: {job.url}")
+            logger.info("Streaming logs...")
+            final_status, all_logs = await self._wait_for_job_completion(
+                job_id=job.id,
+                namespace=self.namespace,
+            )
+            # Filter out UV package installation output
+            filtered_logs = _filter_uv_install_output(all_logs)
+            # Format all logs for the agent
+            log_text = "\n".join(filtered_logs) if filtered_logs else "(no logs)"
+            response = f"""{job_type} job completed!
+**Job ID:** {job.id}
+**Final Status:** {final_status}
+**View at:** {job.url}
+**Logs:**
+```
+{log_text}
+```"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            raise Exception(f"Failed to run job: {str(e)}")
+    async def _list_jobs(self, args: Dict[str, Any]) -> ToolResult:
+        """List jobs using HfApi.list_jobs()"""
+        jobs_list = await _async_call(self.api.list_jobs, namespace=self.namespace)
+        # Filter jobs
+        if not args.get("all", False):
+            jobs_list = [j for j in jobs_list if j.status.stage == "RUNNING"]
+        if args.get("status"):
+            status_filter = args["status"].upper()
+            jobs_list = [j for j in jobs_list if status_filter in j.status.stage]
+        # Convert JobInfo objects to dicts for formatting
+        jobs_dicts = [_job_info_to_dict(j) for j in jobs_list]
+        table = format_jobs_table(jobs_dicts)
+        if len(jobs_list) == 0:
+            if args.get("all", False):
+                return {
+                    "formatted": "No jobs found.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                }
+            return {
+                "formatted": 'No running jobs found. Use `{"operation": "ps", "all": true}` to show all jobs.',
+                "totalResults": 0,
+                "resultsShared": 0,
+            }
+        response = f"**Jobs ({len(jobs_list)} total):**\n\n{table}"
+        return {
+            "formatted": response,
+            "totalResults": len(jobs_list),
+            "resultsShared": len(jobs_list),
+        }
+    async def _get_logs(self, args: Dict[str, Any]) -> ToolResult:
+        """Fetch logs using HfApi.fetch_job_logs()"""
+        job_id = args.get("job_id")
+        if not job_id:
+            return {
+                "formatted": "job_id is required",
+                "isError": True,
+                "totalResults": 0,
+                "resultsShared": 0,
+            }
+        try:
+            # Fetch logs (returns generator, convert to list)
+            logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=self.namespace)
+            logs = await _async_call(list, logs_gen)
+            if not logs:
+                return {
+                    "formatted": f"No logs available for job {job_id}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                }
+            log_text = "\n".join(logs)
+            return {
+                "formatted": f"**Logs for {job_id}:**\n\n```\n{log_text}\n```",
+                "totalResults": 1,
+                "resultsShared": 1,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to fetch logs: {str(e)}",
+                "isError": True,
+                "totalResults": 0,
+                "resultsShared": 0,
+            }
+    async def _inspect_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Inspect job using HfApi.inspect_job()"""
+        job_id = args.get("job_id")
+        if not job_id:
+            return {
+                "formatted": "job_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        job_ids = job_id if isinstance(job_id, list) else [job_id]
+        jobs = []
+        for jid in job_ids:
+            try:
+                job = await _async_call(
+                    self.api.inspect_job,
+                    job_id=jid,
+                    namespace=self.namespace,
+                )
+                jobs.append(_job_info_to_dict(job))
+            except Exception as e:
+                raise Exception(f"Failed to inspect job {jid}: {str(e)}")
+        formatted_details = format_job_details(jobs)
+        response = f"**Job Details** ({len(jobs)} job{'s' if len(jobs) > 1 else ''}):\n\n{formatted_details}"
+        return {
+            "formatted": response,
+            "totalResults": len(jobs),
+            "resultsShared": len(jobs),
+        }
+    async def _cancel_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Cancel job using HfApi.cancel_job()"""
+        job_id = args.get("job_id")
+        if not job_id:
+            return {
+                "formatted": "job_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        await _async_call(
+            self.api.cancel_job,
+            job_id=job_id,
+            namespace=self.namespace,
+        )
+        response = f"""✓ Job {job_id} has been cancelled.
+To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}`"""
+        return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+    async def _scheduled_run(self, args: Dict[str, Any]) -> ToolResult:
+        """Create scheduled job using HfApi.create_scheduled_job() - smart detection of Python vs Docker mode"""
+        try:
+            script = args.get("script")
+            command = args.get("command")
+            schedule = args.get("schedule")
+            if not schedule:
+                raise ValueError("schedule is required for scheduled jobs")
+            # Validate mutually exclusive parameters
+            if script and command:
+                raise ValueError(
+                    "'script' and 'command' are mutually exclusive. Provide one or the other, not both."
+                )
+            if not script and not command:
+                raise ValueError(
+                    "Either 'script' (for Python) or 'command' (for Docker) must be provided."
+                )
+            # Python mode: script provided
+            if script:
+                # Get dependencies and ensure hf-transfer is included
+                deps = _ensure_hf_transfer_dependency(args.get("dependencies"))
+                # Resolve the command based on script type
+                command = _resolve_uv_command(
+                    script=script,
+                    with_deps=deps,
+                    python=args.get("python"),
+                    script_args=args.get("script_args"),
+                )
+                # Use UV image unless overridden
+                image = args.get("image", UV_DEFAULT_IMAGE)
+                job_type = "Python"
+            # Docker mode: command provided
+            else:
+                image = args.get("image", "python:3.12")
+                job_type = "Docker"
+            # Create scheduled job
+            scheduled_job = await _async_call(
+                self.api.create_scheduled_job,
+                image=image,
+                command=command,
+                schedule=schedule,
+                env=args.get("env"),
+                secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
+                flavor=args.get("hardware_flavor", "cpu-basic"),
+                timeout=args.get("timeout", "30m"),
+                namespace=self.namespace,
+            )
+            scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
+            response = f"""✓ Scheduled {job_type} job created successfully!
+**Scheduled Job ID:** {scheduled_dict["id"]}
+**Schedule:** {scheduled_dict["schedule"]}
+**Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
+**Next Run:** {scheduled_dict.get("nextRun", "N/A")}
+To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_job_id": "{scheduled_dict["id"]}"}}`
+To list all, call this tool with `{{"operation": "scheduled ps"}}`"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            raise Exception(f"Failed to create scheduled job: {str(e)}")
+    async def _list_scheduled_jobs(self, args: Dict[str, Any]) -> ToolResult:
+        """List scheduled jobs using HfApi.list_scheduled_jobs()"""
+        scheduled_jobs_list = await _async_call(
+            self.api.list_scheduled_jobs,
+            namespace=self.namespace,
+        )
+        # Filter jobs - default: hide suspended jobs unless --all is specified
+        if not args.get("all", False):
+            scheduled_jobs_list = [j for j in scheduled_jobs_list if not j.suspend]
+        # Convert to dicts for formatting
+        scheduled_dicts = [_scheduled_job_info_to_dict(j) for j in scheduled_jobs_list]
+        table = format_scheduled_jobs_table(scheduled_dicts)
+        if len(scheduled_jobs_list) == 0:
+            if args.get("all", False):
+                return {
+                    "formatted": "No scheduled jobs found.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                }
+            return {
+                "formatted": 'No active scheduled jobs found. Use `{"operation": "scheduled ps", "all": true}` to show suspended jobs.',
+                "totalResults": 0,
+                "resultsShared": 0,
+            }
+        response = f"**Scheduled Jobs ({len(scheduled_jobs_list)} total):**\n\n{table}"
+        return {
+            "formatted": response,
+            "totalResults": len(scheduled_jobs_list),
+            "resultsShared": len(scheduled_jobs_list),
+        }
+    async def _inspect_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Inspect scheduled job using HfApi.inspect_scheduled_job()"""
+        scheduled_job_id = args.get("scheduled_job_id")
+        if not scheduled_job_id:
+            return {
+                "formatted": "scheduled_job_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        scheduled_job = await _async_call(
+            self.api.inspect_scheduled_job,
+            scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
+        )
+        scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
+        formatted_details = format_scheduled_job_details(scheduled_dict)
+        return {
+            "formatted": f"**Scheduled Job Details:**\n\n{formatted_details}",
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    async def _delete_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Delete scheduled job using HfApi.delete_scheduled_job()"""
+        scheduled_job_id = args.get("scheduled_job_id")
+        if not scheduled_job_id:
+            return {
+                "formatted": "scheduled_job_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        await _async_call(
+            self.api.delete_scheduled_job,
+            scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
+        )
+        return {
+            "formatted": f"✓ Scheduled job {scheduled_job_id} has been deleted.",
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    async def _suspend_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Suspend scheduled job using HfApi.suspend_scheduled_job()"""
+        scheduled_job_id = args.get("scheduled_job_id")
+        if not scheduled_job_id:
+            return {
+                "formatted": "scheduled_job_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        await _async_call(
+            self.api.suspend_scheduled_job,
+            scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
+        )
+        response = f"""✓ Scheduled job {scheduled_job_id} has been suspended.
+To resume, call this tool with `{{"operation": "scheduled resume", "scheduled_job_id": "{scheduled_job_id}"}}`"""
+        return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+    async def _resume_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Resume scheduled job using HfApi.resume_scheduled_job()"""
+        scheduled_job_id = args.get("scheduled_job_id")
+        if not scheduled_job_id:
+            return {
+                "formatted": "scheduled_job_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        await _async_call(
+            self.api.resume_scheduled_job,
+            scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
+        )
+        response = f"""✓ Scheduled job {scheduled_job_id} has been resumed.
+To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_job_id": "{scheduled_job_id}"}}`"""
+        return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+# Tool specification for agent registration
+HF_JOBS_TOOL_SPEC = {
+    "name": "hf_jobs",
+    "description": (
+        "Execute Python scripts or Docker containers on HF cloud infrastructure (CPUs/GPUs) in one of two modes. "
+        "\n\n"
+        "**Two Modes (mutually exclusive):**\n"
+        "1. Python mode: using 'script' arg (REQUIRED) + 'dependencies'\n"
+        "2. Docker mode: using 'command' arg (REQUIRED) + 'image'\n\n"
+        "🚨 **REQUIRED:** You MUST provide exactly ONE of: 'script' (Python code as string) OR 'command' (Docker command as array). "
+        "They are mutually exclusive - provide one or the other, never both, never neither. "
+        "Do NOT call with just {'operation': 'run'} - always include your code. Example: {'operation': 'run', 'script': 'import torch; print(torch.cuda.is_available())', 'dependencies': ['torch']} or {'operation': 'run', 'command': ['duckdb', '-c', 'select 1 + 2']', 'image': 'duckdb/duckdb'}\n\n"
+        "⚠️ CRITICAL for reliability: (1) Jobs run ASYNC - provide monitoring URL immediately, don't poll; "
+        "(2) Set timeout >30min (default too short - training needs 2-8h); "
+        "(3) HF_TOKEN auto-loaded to secrets for Hub ops (push_to_hub, private repos); "
+        "(4) Job storage EPHEMERAL - MUST push_to_hub() or ALL work is LOST. "
+        "**Use when:** User wants cloud compute, training models, data processing, batch inference, GPU workloads, scheduled tasks. "
+        "ALWAYS use this tool (✓), never bash 'hf jobs' commands (✗). Pass script content inline (✓), don't save to files unless requested (✗). "
+        "\n\n"
+        "**Operations:** run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume. "
+        "**Available Hardware (vCPU/RAM/GPU):**\n"
+        f"• CPU: {CPU_FLAVORS_DESC}\n"
+        f"• GPU: {GPU_FLAVORS_DESC}\n"
+        "  ◦ Common: t4-small ($0.60/hr, demos/1-3B models), a10g-small ($1/hr), a10g-large ($2/hr, production 7-13B), a100-large ($4/hr, 30B+), h100 ($6/hr, 70B+)\n\n"
+        "**After Submission Ground Rules:**\n"
+        "✓ Return immediately with job ID and monitoring URL\n"
+        "✓ Provide expected completion time and cost estimate\n"
+        "✓ For training: Include Trackio dashboard URL\n"
+        "✓ Note user can check status later\n"
+        "✗ DON'T poll logs automatically\n"
+        "✗ DON'T wait for completion\n"
+        "✗ DON'T check status unless user asks\n\n"
+        "**For Training Tasks:**\n"
+        "• ALWAYS research TRL docs first: explore_hf_docs('trl') → fetch_hf_docs(<trainer_url>)\n"
+        "• ALWAYS validate dataset format with hub_repo_details (SFT needs messages/text, DPO needs chosen/rejected)\n"
+        "• ALWAYS include Trackio monitoring in script (explore_hf_docs('trackio'))\n"
+        "• ALWAYS enable push_to_hub=True in training config\n"
+        "• Set timeout 2-8h for training (NOT default 30m)\n"
+        "• Confirm model/dataset choices with user before submitting\n\n"
+        "**Examples:**\n\n"
+        "**Training - Fine-tune LLM:**\n"
+        "{'operation': 'run', 'script': '# Training script with TRL\\nfrom trl import SFTConfig, SFTTrainer\\nfrom transformers import AutoModelForCausalLM\\nmodel = AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen3-4B\")\\n# ... researched implementation from docs ...\\ntrainer.train()\\ntrainer.push_to_hub(\"user-name/my-model\")', 'dependencies': ['transformers', 'trl', 'torch', 'datasets', 'trackio'], 'hardware_flavor': 'a10g-large', 'timeout': '4h'}\n\n"
+        "**Data Processing:**\n"
+        "{'operation': 'run', 'script': 'from datasets import load_dataset\\nds = load_dataset(\"data\")\\n# process...\\nds.push_to_hub(\"user/processed\")', 'dependencies': ['datasets', 'pandas'], 'hardware_flavor': 'cpu-upgrade', 'timeout': '2h'}\n\n"
+        "**Scheduled Daily Job:**\n"
+        "{'operation': 'scheduled run', 'schedule': '@daily', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'hardware_flavor': 'cpu-basic'}\n\n"
+        "**Docker Mode:**\n"
+        "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
+        "**Monitor Operations:**\n"
+        "{'operation': 'ps'} - List all jobs\n"
+        "{'operation': 'logs', 'job_id': 'xxx'} - Stream logs (only when user requests)\n"
+        "{'operation': 'inspect', 'job_id': 'xxx'} - Get job details\n"
+        "{'operation': 'cancel', 'job_id': 'xxx'} - Stop job\n\n"
+        "⚠️ CRITICAL: Files created during execution are DELETED when job finishes. MUST push_to_hub() all outputs (models, datasets, artifacts) in script. For logs/scripts, use hf_private_repos after completion."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": [
+                    "run",
+                    "ps",
+                    "logs",
+                    "inspect",
+                    "cancel",
+                    "scheduled run",
+                    "scheduled ps",
+                    "scheduled inspect",
+                    "scheduled delete",
+                    "scheduled suspend",
+                    "scheduled resume",
+                ],
+                "description": (
+                    "Operation to execute. Valid values: [run, ps, logs, inspect, cancel, "
+                    "scheduled run, scheduled ps, scheduled inspect, scheduled delete, "
+                    "scheduled suspend, scheduled resume]"
+                ),
+            },
+            # Python/UV specific parameters
+            "script": {
+                "type": "string",
+                "description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
+            },
+            "dependencies": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
+            },
+            # Docker specific parameters
+            "image": {
+                "type": "string",
+                "description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
+            },
+            "command": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
+            },
+            # Hardware and environment
+            "hardware_flavor": {
+                "type": "string",
+                "description": f"Hardware type. Available CPU flavors: {CPU_FLAVORS}. Available GPU flavors: {GPU_FLAVORS}. Use with 'run'/'scheduled run'.",
+            },
+            "timeout": {
+                "type": "string",
+                "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
+            },
+            "env": {
+                "type": "object",
+                "description": "Environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is automatically included from your auth. Use with 'run'/'scheduled run'.",
+            },
+            # Job management parameters
+            "job_id": {
+                "type": "string",
+                "description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
+            },
+            # Scheduled job parameters
+            "scheduled_job_id": {
+                "type": "string",
+                "description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
+            },
+            "schedule": {
+                "type": "string",
+                "description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
+            },
+        },
+        "required": ["operation"],
+    },
+}
+async def hf_jobs_handler(
+    arguments: Dict[str, Any], session: Any = None
+) -> tuple[str, bool]:
+    """Handler for agent tool router"""
+    try:
+        async def log_callback(log: str):
+            if session:
+                await session.send_event(
+                    Event(event_type="tool_log", data={"tool": "hf_jobs", "log": log})
+                )
+        # Prefer the authenticated user's OAuth token, fall back to global env
+        hf_token = (
+            (getattr(session, "hf_token", None) if session else None)
+            or os.environ.get("HF_TOKEN")
+            or os.environ.get("HUGGINGFACE_HUB_TOKEN")
+        )
+        namespace = os.environ.get("HF_NAMESPACE") or (HfApi(token=hf_token).whoami().get("name") if hf_token else None)
+        tool = HfJobsTool(
+            namespace=namespace,
+            hf_token=hf_token,
+            log_callback=log_callback if session else None,
+        )
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error executing HF Jobs tool: {str(e)}", False

agent/tools/plan_tool.py ADDED Viewed

	@@ -0,0 +1,138 @@

+from typing import Any, Dict, List
+from agent.core.session import Event
+from agent.utils.terminal_display import format_plan_tool_output
+from .types import ToolResult
+# In-memory storage for the current plan (raw structure from agent)
+_current_plan: List[Dict[str, str]] = []
+class PlanTool:
+    """Tool for managing a list of todos with status tracking."""
+    def __init__(self, session: Any = None):
+        self.session = session
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """
+        Execute the WritePlan operation.
+        Args:
+            params: Dictionary containing:
+                - todos: List of todo items, each with id, content, and status
+        Returns:
+            ToolResult with formatted output
+        """
+        global _current_plan
+        todos = params.get("todos", [])
+        # Validate todos structure
+        for todo in todos:
+            if not isinstance(todo, dict):
+                return {
+                    "formatted": "Error: Each todo must be an object. Re call the tool with correct format (mandatory).",
+                    "isError": True,
+                }
+            required_fields = ["id", "content", "status"]
+            for field in required_fields:
+                if field not in todo:
+                    return {
+                        "formatted": f"Error: Todo missing required field '{field}'. Re call the tool with correct format (mandatory).",
+                        "isError": True,
+                    }
+            # Validate status
+            valid_statuses = ["pending", "in_progress", "completed"]
+            if todo["status"] not in valid_statuses:
+                return {
+                    "formatted": f"Error: Invalid status '{todo['status']}'. Must be one of: {', '.join(valid_statuses)}. Re call the tool with correct format (mandatory).",
+                    "isError": True,
+                }
+        # Store the raw todos structure in memory
+        _current_plan = todos
+        # Emit plan update event if session is available
+        if self.session:
+            await self.session.send_event(
+                Event(
+                    event_type="plan_update",
+                    data={"plan": todos},
+                )
+            )
+        # Format only for display using terminal_display utility
+        formatted_output = format_plan_tool_output(todos)
+        return {
+            "formatted": formatted_output,
+            "totalResults": len(todos),
+            "isError": False,
+        }
+def get_current_plan() -> List[Dict[str, str]]:
+    """Get the current plan (raw structure)."""
+    return _current_plan
+# Tool specification
+PLAN_TOOL_SPEC = {
+    "name": "plan_tool",
+    "description": (
+        "Manage task planning and progress tracking with todo list (pending/in_progress/completed statuses). "
+        "⚠️ CRITICAL: ALWAYS use for multi-step tasks (3+ steps) and MUST update frequently to show progress. "
+        "**Use when:** (1) User provides multiple tasks, (2) Complex workflows (training, evaluation, data processing), "
+        "(3) Tasks requiring multiple tool calls, (4) Need to communicate progress clearly to user, "
+        "(5) Breaking down ambiguous requests into concrete steps. "
+        "**Pattern:** Create plan at start → Mark in_progress when starting task → Mark completed immediately after finishing → User sees clear progress. "
+        "Each call replaces entire plan (full list required). "
+        "**Critical for reliability:** Exactly ONE task in_progress at a time (not zero, not multiple). "
+        "Mark tasks completed IMMEDIATELY after finishing - don't batch completions. "
+        "**For long-running tasks:** Update plan after each major step to keep user informed. "
+        "**Only mark completed when:** Task fully accomplished, no errors, all requirements met. "
+        "Keep tasks pending if blocked/errors occur - create new task to resolve blockers."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "todos": {
+                "type": "array",
+                "description": "List of todo items",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "id": {
+                            "type": "string",
+                            "description": "Unique identifier for the todo",
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "Description of the todo task",
+                        },
+                        "status": {
+                            "type": "string",
+                            "enum": ["pending", "in_progress", "completed"],
+                            "description": "Current status of the todo",
+                        },
+                    },
+                    "required": ["id", "content", "status"],
+                },
+            }
+        },
+        "required": ["todos"],
+    },
+}
+async def plan_tool_handler(
+    arguments: Dict[str, Any], session: Any = None
+) -> tuple[str, bool]:
+    tool = PlanTool(session=session)
+    result = await tool.execute(arguments)
+    return result["formatted"], not result.get("isError", False)

agent/tools/private_hf_repo_tools.py ADDED Viewed

	@@ -0,0 +1,650 @@

+"""
+Private HF Repos Tool - Manage private Hugging Face repositories
+PRIMARY USE: Store job outputs, training scripts, and logs from HF Jobs.
+Since job results are ephemeral, this tool provides persistent storage in private repos.
+SECONDARY USE: Read back stored files and list repo contents.
+"""
+import asyncio
+from typing import Any, Dict, Literal, Optional
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.utils import HfHubHTTPError
+from agent.tools.types import ToolResult
+# Operation names
+OperationType = Literal[
+    "upload_file", "create_repo", "check_repo", "list_files", "read_file"
+]
+async def _async_call(func, *args, **kwargs):
+    """Wrap synchronous HfApi calls for async context."""
+    return await asyncio.to_thread(func, *args, **kwargs)
+def _build_repo_url(repo_id: str, repo_type: str = "dataset") -> str:
+    """Build the Hub URL for a repository."""
+    type_path = "" if repo_type == "model" else f"{repo_type}s"
+    return f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
+def _content_to_bytes(content: str | bytes) -> bytes:
+    """Convert string or bytes content to bytes."""
+    if isinstance(content, str):
+        return content.encode("utf-8")
+    return content
+class PrivateHfRepoTool:
+    """Tool for managing private Hugging Face repositories."""
+    def __init__(self, hf_token: Optional[str] = None):
+        self.api = HfApi(token=hf_token)
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """Execute the specified upload operation."""
+        operation = params.get("operation")
+        args = params.get("args", {})
+        # If no operation provided, return usage instructions
+        if not operation:
+            return self._show_help()
+        # Normalize operation name
+        operation = operation.lower()
+        # Check if help is requested
+        if args.get("help"):
+            return self._show_operation_help(operation)
+        try:
+            # Route to appropriate handler
+            if operation == "upload_file":
+                return await self._upload_file(args)
+            elif operation == "create_repo":
+                return await self._create_repo(args)
+            elif operation == "check_repo":
+                return await self._check_repo(args)
+            elif operation == "list_files":
+                return await self._list_files(args)
+            elif operation == "read_file":
+                return await self._read_file(args)
+            else:
+                return {
+                    "formatted": f'Unknown operation: "{operation}"\n\n'
+                    "Available operations: upload_file, create_repo, check_repo, list_files, read_file\n\n"
+                    "Call this tool with no operation for full usage instructions.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except HfHubHTTPError as e:
+            return {
+                "formatted": f"API Error: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Error executing {operation}: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    def _show_help(self) -> ToolResult:
+        """Show usage instructions when tool is called with no arguments."""
+        usage_text = """# Private HF Repos Tool
+**PRIMARY USE:** Store job outputs, scripts, and logs from HF Jobs to private repos.
+Since job results are ephemeral, use this tool for persistent storage.
+**SECONDARY USE:** Read back stored files and list repo contents.
+## Available Commands
+### Write Operations
+- **upload_file** - Upload file content to a repository
+- **create_repo** - Create a new private repository
+### Read Operations
+- **list_files** - List all files in a repository
+- **read_file** - Read content of a specific file from a repository
+- **check_repo** - Check if a repository exists
+## Examples
+### Upload a script to a dataset repo
+Call this tool with:
+```json
+{
+  "operation": "upload_file",
+  "args": {
+    "file_content": "import pandas as pd\\nprint('Hello from HF!')",
+    "path_in_repo": "scripts/hello.py",
+    "repo_id": "my-dataset",
+    "repo_type": "dataset",
+    "create_if_missing": true,
+    "commit_message": "Add hello script"
+  }
+}
+```
+### Upload logs from a job
+Call this tool with:
+```json
+{
+  "operation": "upload_file",
+  "args": {
+    "file_content": "Job started...\\nJob completed successfully!",
+    "path_in_repo": "jobs/job-abc123/logs.txt",
+    "repo_id": "job-results",
+    "create_if_missing": true
+  }
+}
+```
+### Create a repository
+Call this tool with:
+```json
+{
+  "operation": "create_repo",
+  "args": {
+    "repo_id": "my-results",
+    "repo_type": "dataset"
+  }
+}
+```
+### Create a Space
+Call this tool with:
+```json
+{
+  "operation": "create_repo",
+  "args": {
+    "repo_id": "my-gradio-app",
+    "repo_type": "space",
+    "space_sdk": "gradio"
+  }
+}
+```
+Note: Repositories are always created as private. For spaces, `space_sdk` is required (gradio, streamlit, static, or docker).
+### Check if a repository exists
+Call this tool with:
+```json
+{
+  "operation": "check_repo",
+  "args": {
+    "repo_id": "my-dataset",
+    "repo_type": "dataset"
+  }
+}
+```
+### List files in a repository
+Call this tool with:
+```json
+{
+  "operation": "list_files",
+  "args": {
+    "repo_id": "job-results",
+    "repo_type": "dataset"
+  }
+}
+```
+### Read a file from a repository
+Call this tool with:
+```json
+{
+  "operation": "read_file",
+  "args": {
+    "repo_id": "job-results",
+    "path_in_repo": "jobs/job-abc123/script.py",
+    "repo_type": "dataset"
+  }
+}
+```
+## Repository Types
+- **dataset** (default) - For storing data, results, logs, scripts
+- **model** - For ML models and related artifacts
+- **space** - For Spaces and applications
+## Tips
+- **Content-based**: Pass file content directly as strings or bytes, not file paths
+- **Repo ID format**: Use just the repo name (e.g., "my-dataset"). Username is automatically inferred from HF_TOKEN
+- **Automatic repo creation**: Set `create_if_missing: true` to auto-create repos (requires user approval)
+- **Organization**: Use path_in_repo to organize files (e.g., "jobs/job-123/script.py")
+- **After jobs**: Upload job scripts and logs after compute jobs complete for reproducibility
+"""
+        return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
+    def _show_operation_help(self, operation: str) -> ToolResult:
+        """Show help for a specific operation."""
+        help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
+        return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
+    async def _upload_file(self, args: Dict[str, Any]) -> ToolResult:
+        """Upload file content to a Hub repository."""
+        # Validate required arguments
+        file_content = args.get("file_content")
+        path_in_repo = args.get("path_in_repo")
+        repo_id = args.get("repo_id")
+        if not file_content:
+            return {
+                "formatted": "file_content is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not path_in_repo:
+            return {
+                "formatted": "path_in_repo is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        create_if_missing = args.get("create_if_missing", False)
+        # Check if repo exists
+        try:
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            # Create repo if needed
+            if not repo_exists and create_if_missing:
+                create_args = {
+                    "repo_id": repo_id,
+                    "repo_type": repo_type,
+                    "private": True,
+                }
+                # Pass through space_sdk if provided (required for spaces)
+                if "space_sdk" in args:
+                    create_args["space_sdk"] = args["space_sdk"]
+                await self._create_repo(create_args)
+            elif not repo_exists:
+                return {
+                    "formatted": f"Repository {repo_id} does not exist. Set create_if_missing: true to create it.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to check repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        # Convert content to bytes
+        file_bytes = _content_to_bytes(file_content)
+        # Upload file
+        try:
+            await _async_call(
+                self.api.upload_file,
+                path_or_fileobj=file_bytes,
+                path_in_repo=path_in_repo,
+                repo_id=repo_id,
+                repo_type=repo_type,
+                commit_message=args.get("commit_message", f"Upload {path_in_repo}"),
+            )
+            repo_url = _build_repo_url(repo_id, repo_type)
+            file_url = f"{repo_url}/blob/main/{path_in_repo}"
+            response = f"""✓ File uploaded successfully!
+**Repository:** {repo_id}
+**File:** {path_in_repo}
+**View at:** {file_url}
+**Browse repo:** {repo_url}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            return {
+                "formatted": f"Failed to upload file: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a new Hub repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        private = True  # Always create private repos
+        space_sdk = args.get("space_sdk")  # Required if repo_type is "space"
+        try:
+            # Check if repo already exists
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            if repo_exists:
+                repo_url = _build_repo_url(repo_id, repo_type)
+                return {
+                    "formatted": f"Repository {repo_id} already exists.\n**View at:** {repo_url}",
+                    "totalResults": 1,
+                    "resultsShared": 1,
+                }
+            # Validate space_sdk for spaces
+            if repo_type == "space" and not space_sdk:
+                return {
+                    "formatted": "space_sdk is required when creating a space. Valid values: gradio, streamlit, static, docker",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+            # Create repository
+            create_kwargs = {
+                "repo_id": repo_id,
+                "repo_type": repo_type,
+                "private": private,
+                "exist_ok": True,
+            }
+            # Add space_sdk only for spaces
+            if repo_type == "space" and space_sdk:
+                create_kwargs["space_sdk"] = space_sdk
+            repo_url = await _async_call(self.api.create_repo, **create_kwargs)
+            response = f"""✓ Repository created successfully!
+**Repository:** {repo_id}
+**Type:** {repo_type}
+**Private:** Yes
+**View at:** {repo_url}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            return {
+                "formatted": f"Failed to create repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _check_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Check if a Hub repository exists."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            if repo_exists:
+                repo_url = _build_repo_url(repo_id, repo_type)
+                response = f"""✓ Repository exists!
+**Repository:** {repo_id}
+**Type:** {repo_type}
+**View at:** {repo_url}"""
+            else:
+                response = f"""Repository does not exist: {repo_id}
+To create it, call this tool with:
+```json
+{{
+  "operation": "create_repo",
+  "args": {{
+    "repo_id": "{repo_id}",
+    "repo_type": "{repo_type}"
+  }}
+}}
+```"""
+            return {
+                "formatted": response,
+                "totalResults": 1 if repo_exists else 0,
+                "resultsShared": 1 if repo_exists else 0,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to check repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _list_files(self, args: Dict[str, Any]) -> ToolResult:
+        """List all files in a Hub repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            # List all files in the repository
+            files = await _async_call(
+                self.api.list_repo_files, repo_id=repo_id, repo_type=repo_type
+            )
+            if not files:
+                return {
+                    "formatted": f"No files found in repository: {repo_id}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                }
+            # Format file list
+            file_list = "\n".join(f"- {f}" for f in sorted(files))
+            repo_url = _build_repo_url(repo_id, repo_type)
+            response = f"""✓ Files in repository: {repo_id}
+**Total files:** {len(files)}
+**Repository URL:** {repo_url}
+**Files:**
+{file_list}"""
+            return {
+                "formatted": response,
+                "totalResults": len(files),
+                "resultsShared": len(files),
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to list files: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _read_file(self, args: Dict[str, Any]) -> ToolResult:
+        """Read content of a specific file from a Hub repository."""
+        repo_id = args.get("repo_id")
+        path_in_repo = args.get("path_in_repo")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not path_in_repo:
+            return {
+                "formatted": "path_in_repo is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            # Download file to cache and read it
+            file_path = await _async_call(
+                hf_hub_download,
+                repo_id=repo_id,
+                filename=path_in_repo,
+                repo_type=repo_type,
+                token=self.api.token,
+            )
+            # Read file content
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            repo_url = _build_repo_url(repo_id, repo_type)
+            file_url = f"{repo_url}/blob/main/{path_in_repo}"
+            response = f"""✓ File read successfully!
+**Repository:** {repo_id}
+**File:** {path_in_repo}
+**Size:** {len(content)} characters
+**View at:** {file_url}
+**Content:**
+```
+{content}
+```"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except UnicodeDecodeError:
+            # If file is binary, return size info instead
+            try:
+                with open(file_path, "rb") as f:
+                    binary_content = f.read()
+                return {
+                    "formatted": f"File is binary ({len(binary_content)} bytes). Cannot display as text.",
+                    "totalResults": 1,
+                    "resultsShared": 1,
+                }
+            except Exception as e:
+                return {
+                    "formatted": f"Failed to read binary file: {str(e)}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to read file: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+# Tool specification for agent registration
+PRIVATE_HF_REPO_TOOL_SPEC = {
+    "name": "hf_private_repos",
+    "description": (
+        "Manage private HF repositories - create, upload, read, list files in models/datasets/spaces. "
+        "⚠️ PRIMARY USE: Store job outputs persistently (job storage is EPHEMERAL - everything deleted after completion). "
+        "**Use when:** (1) Job completes and need to store logs/scripts/results, (2) Creating repos for training outputs, "
+        "(3) Reading back stored files, (4) Managing Space files, (5) Organizing job artifacts by path. "
+        "**Pattern:** hf_jobs (ephemeral) → hf_private_repos upload_file (persistent) → can read_file later. "
+        "ALWAYS pass file_content as string/bytes (✓), never file paths (✗) - this is content-based, no filesystem access. "
+        "**Operations:** create_repo (new private repo), upload_file (store content), read_file (retrieve content), list_files (browse), check_repo (verify exists). "
+        "**Critical for reliability:** Jobs lose all files after completion - use this tool to preserve important outputs. "
+        "Repositories created are ALWAYS private by default (good for sensitive training data/models). "
+        "For Spaces: must provide space_sdk ('gradio', 'streamlit', 'static', 'docker') when creating. "
+        "**Then:** After uploading, provide user with repository URL for viewing/sharing."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": [
+                    "upload_file",
+                    "create_repo",
+                    "check_repo",
+                    "list_files",
+                    "read_file",
+                ],
+                "description": (
+                    "Operation to execute. Valid values: [upload_file, create_repo, check_repo, list_files, read_file]"
+                ),
+            },
+            "args": {
+                "type": "object",
+                "description": (
+                    "Operation-specific arguments as a JSON object. "
+                    "Write ops: file_content (string/bytes), path_in_repo (string), repo_id (string), "
+                    "repo_type (dataset/model/space), create_if_missing (boolean), commit_message (string), "
+                    "space_sdk (gradio/streamlit/static/docker - required when repo_type=space). "
+                    "Read ops: repo_id (string), path_in_repo (for read_file), repo_type (optional)."
+                ),
+                "additionalProperties": True,
+            },
+        },
+    },
+}
+async def private_hf_repo_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = PrivateHfRepoTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error executing Private HF Repo tool: {str(e)}", False

agent/tools/types.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""
+Types for Hugging Face tools
+Ported from: hf-mcp-server/packages/mcp/src/types/
+"""
+from typing import TypedDict
+class ToolResult(TypedDict, total=False):
+    """Result returned by HF tool operations"""
+    formatted: str
+    totalResults: int
+    resultsShared: int
+    isError: bool

agent/tools/utilities.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""
+Utility functions for Hugging Face tools
+Ported from: hf-mcp-server/packages/mcp/src/jobs/formatters.ts
+Includes GPU memory validation for job submissions
+"""
+import json
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+def truncate(text: str, max_length: int) -> str:
+    """Truncate a string to a maximum length with ellipsis"""
+    if len(text) <= max_length:
+        return text
+    return text[: max_length - 3] + "..."
+def format_date(date_str: Optional[str]) -> str:
+    """Format a date string to a readable format"""
+    if not date_str:
+        return "N/A"
+    try:
+        date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        return date.strftime("%Y-%m-%d %H:%M:%S")
+    except Exception:
+        return date_str
+def format_command(command: Optional[List[str]]) -> str:
+    """Format command array as a single string"""
+    if not command or len(command) == 0:
+        return "N/A"
+    return " ".join(command)
+def get_image_or_space(job: Dict[str, Any]) -> str:
+    """Get image/space identifier from job"""
+    if job.get("spaceId"):
+        return job["spaceId"]
+    if job.get("dockerImage"):
+        return job["dockerImage"]
+    return "N/A"
+def format_jobs_table(jobs: List[Dict[str, Any]]) -> str:
+    """Format jobs as a markdown table"""
+    if len(jobs) == 0:
+        return "No jobs found."
+    # Calculate dynamic ID column width
+    longest_id_length = max(len(job["id"]) for job in jobs)
+    id_column_width = max(longest_id_length, len("JOB ID"))
+    # Define column widths
+    col_widths = {
+        "id": id_column_width,
+        "image": 20,
+        "command": 30,
+        "created": 19,
+        "status": 12,
+    }
+    # Build header
+    header = f"| {'JOB ID'.ljust(col_widths['id'])} | {'IMAGE/SPACE'.ljust(col_widths['image'])} | {'COMMAND'.ljust(col_widths['command'])} | {'CREATED'.ljust(col_widths['created'])} | {'STATUS'.ljust(col_widths['status'])} |"
+    separator = f"|{'-' * (col_widths['id'] + 2)}|{'-' * (col_widths['image'] + 2)}|{'-' * (col_widths['command'] + 2)}|{'-' * (col_widths['created'] + 2)}|{'-' * (col_widths['status'] + 2)}|"
+    # Build rows
+    rows = []
+    for job in jobs:
+        job_id = job["id"]
+        image = truncate(get_image_or_space(job), col_widths["image"])
+        command = truncate(format_command(job.get("command")), col_widths["command"])
+        created = truncate(format_date(job.get("createdAt")), col_widths["created"])
+        status = truncate(job["status"]["stage"], col_widths["status"])
+        rows.append(
+            f"| {job_id.ljust(col_widths['id'])} | {image.ljust(col_widths['image'])} | {command.ljust(col_widths['command'])} | {created.ljust(col_widths['created'])} | {status.ljust(col_widths['status'])} |"
+        )
+    return "\n".join([header, separator] + rows)
+def format_scheduled_jobs_table(jobs: List[Dict[str, Any]]) -> str:
+    """Format scheduled jobs as a markdown table"""
+    if len(jobs) == 0:
+        return "No scheduled jobs found."
+    # Calculate dynamic ID column width
+    longest_id_length = max(len(job["id"]) for job in jobs)
+    id_column_width = max(longest_id_length, len("ID"))
+    # Define column widths
+    col_widths = {
+        "id": id_column_width,
+        "schedule": 12,
+        "image": 18,
+        "command": 25,
+        "lastRun": 19,
+        "nextRun": 19,
+        "suspend": 9,
+    }
+    # Build header
+    header = f"| {'ID'.ljust(col_widths['id'])} | {'SCHEDULE'.ljust(col_widths['schedule'])} | {'IMAGE/SPACE'.ljust(col_widths['image'])} | {'COMMAND'.ljust(col_widths['command'])} | {'LAST RUN'.ljust(col_widths['lastRun'])} | {'NEXT RUN'.ljust(col_widths['nextRun'])} | {'SUSPENDED'.ljust(col_widths['suspend'])} |"
+    separator = f"|{'-' * (col_widths['id'] + 2)}|{'-' * (col_widths['schedule'] + 2)}|{'-' * (col_widths['image'] + 2)}|{'-' * (col_widths['command'] + 2)}|{'-' * (col_widths['lastRun'] + 2)}|{'-' * (col_widths['nextRun'] + 2)}|{'-' * (col_widths['suspend'] + 2)}|"
+    # Build rows
+    rows = []
+    for job in jobs:
+        job_id = job["id"]
+        schedule = truncate(job["schedule"], col_widths["schedule"])
+        image = truncate(get_image_or_space(job["jobSpec"]), col_widths["image"])
+        command = truncate(
+            format_command(job["jobSpec"].get("command")), col_widths["command"]
+        )
+        last_run = truncate(format_date(job.get("lastRun")), col_widths["lastRun"])
+        next_run = truncate(format_date(job.get("nextRun")), col_widths["nextRun"])
+        suspend = "Yes" if job.get("suspend") else "No"
+        rows.append(
+            f"| {job_id.ljust(col_widths['id'])} | {schedule.ljust(col_widths['schedule'])} | {image.ljust(col_widths['image'])} | {command.ljust(col_widths['command'])} | {last_run.ljust(col_widths['lastRun'])} | {next_run.ljust(col_widths['nextRun'])} | {suspend.ljust(col_widths['suspend'])} |"
+        )
+    return "\n".join([header, separator] + rows)
+def format_job_details(jobs: Any) -> str:
+    """Format job details as JSON in a markdown code block"""
+    job_array = jobs if isinstance(jobs, list) else [jobs]
+    json_str = json.dumps(job_array, indent=2)
+    return f"```json\n{json_str}\n```"
+def format_scheduled_job_details(jobs: Any) -> str:
+    """Format scheduled job details as JSON in a markdown code block"""
+    job_array = jobs if isinstance(jobs, list) else [jobs]
+    json_str = json.dumps(job_array, indent=2)
+    return f"```json\n{json_str}\n```"

agent/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Utility functions and helpers
+"""

agent/utils/reliability_checks.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Reliability checks for job submissions and other operations"""
+from agent.utils.terminal_display import Colors
+def check_training_script_save_pattern(script: str) -> str | None:
+    """Check if a training script properly saves models."""
+    has_from_pretrained = "from_pretrained" in script
+    has_push_to_hub = "push_to_hub" in script
+    if has_from_pretrained and not has_push_to_hub:
+        return f"\n{Colors.RED}WARNING: We've detected that no model will be saved at the end of this training script. Please ensure this is what you want.{Colors.RESET}"
+    elif has_from_pretrained and has_push_to_hub:
+        return f"\n{Colors.GREEN}We've detected that a model will be pushed to hub at the end of this training.{Colors.RESET}"
+    return None

agent/utils/terminal_display.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""
+Terminal display utilities with colors and formatting
+"""
+# ANSI color codes
+class Colors:
+    RED = "\033[91m"
+    GREEN = "\033[92m"
+    YELLOW = "\033[93m"
+    BLUE = "\033[94m"
+    MAGENTA = "\033[95m"
+    CYAN = "\033[96m"
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+    RESET = "\033[0m"
+def truncate_to_lines(text: str, max_lines: int = 6) -> str:
+    """Truncate text to max_lines, adding '...' if truncated"""
+    lines = text.split("\n")
+    if len(lines) <= max_lines:
+        return text
+    return (
+        "\n".join(lines[:max_lines])
+        + f"\n{Colors.CYAN}... ({len(lines) - max_lines} more lines){Colors.RESET}"
+    )
+def format_header(text: str, emoji: str = "") -> str:
+    """Format a header with bold"""
+    full_text = f"{emoji} {text}" if emoji else text
+    return f"{Colors.BOLD}{full_text}{Colors.RESET}"
+def format_plan_display() -> str:
+    """Format the current plan for display (no colors, full visibility)"""
+    from agent.tools.plan_tool import get_current_plan
+    plan = get_current_plan()
+    if not plan:
+        return ""
+    lines = ["\n" + "=" * 60]
+    lines.append("CURRENT PLAN")
+    lines.append("=" * 60 + "\n")
+    # Group by status
+    completed = [t for t in plan if t["status"] == "completed"]
+    in_progress = [t for t in plan if t["status"] == "in_progress"]
+    pending = [t for t in plan if t["status"] == "pending"]
+    if completed:
+        lines.append("Completed:")
+        for todo in completed:
+            lines.append(f"  [x] {todo['id']}. {todo['content']}")
+        lines.append("")
+    if in_progress:
+        lines.append("In Progress:")
+        for todo in in_progress:
+            lines.append(f"  [~] {todo['id']}. {todo['content']}")
+        lines.append("")
+    if pending:
+        lines.append("Pending:")
+        for todo in pending:
+            lines.append(f"  [ ] {todo['id']}. {todo['content']}")
+        lines.append("")
+    lines.append(
+        f"Total: {len(plan)} todos ({len(completed)} completed, {len(in_progress)} in progress, {len(pending)} pending)"
+    )
+    lines.append("=" * 60 + "\n")
+    return "\n".join(lines)
+def format_error(message: str) -> str:
+    """Format an error message in red"""
+    return f"{Colors.RED}ERROR: {message}{Colors.RESET}"
+def format_success(message: str, emoji: str = "") -> str:
+    """Format a success message in green"""
+    prefix = f"{emoji} " if emoji else ""
+    return f"{Colors.GREEN}{prefix}{message}{Colors.RESET}"
+def format_tool_call(tool_name: str, arguments: str) -> str:
+    """Format a tool call message"""
+    return f"{Colors.YELLOW}Calling tool: {Colors.BOLD}{tool_name}{Colors.RESET}{Colors.YELLOW} with arguments: {arguments}{Colors.RESET}"
+def format_tool_output(output: str, success: bool, truncate: bool = True) -> str:
+    """Format tool output with color and optional truncation"""
+    original_length = len(output)
+    if truncate:
+        output = truncate_to_lines(output, max_lines=6)
+    if success:
+        return (
+            f"{Colors.YELLOW}Tool output ({original_length} tkns): {Colors.RESET}\n{output}"
+        )
+    else:
+        return (
+            f"{Colors.RED}Tool output ({original_length} tokens): {Colors.RESET}\n{output}"
+        )
+def format_turn_complete() -> str:
+    """Format turn complete message in green with hugging face emoji"""
+    return f"{Colors.GREEN}{Colors.BOLD}\U0001f917 Turn complete{Colors.RESET}\n"
+def format_separator(char: str = "=", length: int = 60) -> str:
+    """Format a separator line"""
+    return char * length
+def format_plan_tool_output(todos: list) -> str:
+    """Format the plan tool output (no colors, full visibility)"""
+    if not todos:
+        return "Plan is empty."
+    lines = ["Plan updated successfully", ""]
+    # Group by status
+    completed = [t for t in todos if t["status"] == "completed"]
+    in_progress = [t for t in todos if t["status"] == "in_progress"]
+    pending = [t for t in todos if t["status"] == "pending"]
+    if completed:
+        lines.append("Completed:")
+        for todo in completed:
+            lines.append(f"  [x] {todo['id']}. {todo['content']}")
+        lines.append("")
+    if in_progress:
+        lines.append("In Progress:")
+        for todo in in_progress:
+            lines.append(f"  [~] {todo['id']}. {todo['content']}")
+        lines.append("")
+    if pending:
+        lines.append("Pending:")
+        for todo in pending:
+            lines.append(f"  [ ] {todo['id']}. {todo['content']}")
+        lines.append("")
+    lines.append(
+        f"Total: {len(todos)} todos ({len(completed)} completed, {len(in_progress)} in progress, {len(pending)} pending)"
+    )
+    return "\n".join(lines)

configs/main_agent_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "model_name": "anthropic/claude-opus-4-5-20251101",
+  "save_sessions": true,
+  "session_dataset_repo": "akseljoonas/hf-agent-sessions",
+  "yolo_mode": false,
+  "confirm_cpu_jobs": false,
+  "auto_file_upload": true,
+  "mcpServers": {
+    "hf-mcp-server": {
+      "transport": "http",
+      "url": "https://huggingface.co/mcp?login",
+      "headers": {
+        "Authorization": "Bearer ${HF_TOKEN}"
+      }
+    }
+  }
+}

dependencies.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""Authentication dependencies for FastAPI routes.
+Provides auth validation for both REST and WebSocket endpoints.
+- In dev mode (OAUTH_CLIENT_ID not set): auth is bypassed, returns a default "dev" user.
+- In production: validates Bearer tokens or cookies against HF OAuth.
+"""
+import logging
+import os
+import time
+from typing import Any
+import httpx
+from fastapi import HTTPException, Request, WebSocket, status
+logger = logging.getLogger(__name__)
+OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
+AUTH_ENABLED = bool(os.environ.get("OAUTH_CLIENT_ID", ""))
+# Simple in-memory token cache: token -> (user_info, expiry_time)
+_token_cache: dict[str, tuple[dict[str, Any], float]] = {}
+TOKEN_CACHE_TTL = 300  # 5 minutes
+DEV_USER: dict[str, Any] = {
+    "user_id": "dev",
+    "username": "dev",
+    "authenticated": True,
+}
+async def _validate_token(token: str) -> dict[str, Any] | None:
+    """Validate a token against HF OAuth userinfo endpoint.
+    Results are cached for TOKEN_CACHE_TTL seconds to avoid excessive API calls.
+    """
+    now = time.time()
+    # Check cache
+    if token in _token_cache:
+        user_info, expiry = _token_cache[token]
+        if now < expiry:
+            return user_info
+        del _token_cache[token]
+    # Validate against HF
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        try:
+            response = await client.get(
+                f"{OPENID_PROVIDER_URL}/oauth/userinfo",
+                headers={"Authorization": f"Bearer {token}"},
+            )
+            if response.status_code != 200:
+                logger.debug("Token validation failed: status %d", response.status_code)
+                return None
+            user_info = response.json()
+            _token_cache[token] = (user_info, now + TOKEN_CACHE_TTL)
+            return user_info
+        except httpx.HTTPError as e:
+            logger.warning("Token validation error: %s", e)
+            return None
+def _user_from_info(user_info: dict[str, Any]) -> dict[str, Any]:
+    """Build a normalized user dict from HF userinfo response."""
+    return {
+        "user_id": user_info.get("sub", user_info.get("preferred_username", "unknown")),
+        "username": user_info.get("preferred_username", "unknown"),
+        "name": user_info.get("name"),
+        "picture": user_info.get("picture"),
+        "authenticated": True,
+    }
+async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
+    """Validate a token and return a user dict, or None."""
+    user_info = await _validate_token(token)
+    if user_info:
+        return _user_from_info(user_info)
+    return None
+async def get_current_user(request: Request) -> dict[str, Any]:
+    """FastAPI dependency: extract and validate the current user.
+    Checks (in order):
+    1. Authorization: Bearer <token> header
+    2. hf_access_token cookie
+    In dev mode (AUTH_ENABLED=False), returns a default dev user.
+    """
+    if not AUTH_ENABLED:
+        return DEV_USER
+    # Try Authorization header
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        token = auth_header[7:]
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    # Try cookie
+    token = request.cookies.get("hf_access_token")
+    if token:
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Not authenticated. Please log in via /auth/login.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+async def get_ws_user(websocket: WebSocket) -> dict[str, Any] | None:
+    """Extract and validate user from WebSocket connection.
+    WebSocket doesn't support custom headers from browser, so we check:
+    1. ?token= query parameter
+    2. hf_access_token cookie (sent automatically for same-origin)
+    Returns user dict or None if not authenticated.
+    In dev mode, returns the default dev user.
+    """
+    if not AUTH_ENABLED:
+        return DEV_USER
+    # Try query param
+    token = websocket.query_params.get("token")
+    if token:
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    # Try cookie (works for same-origin WebSocket)
+    token = websocket.cookies.get("hf_access_token")
+    if token:
+        user = await _extract_user_from_token(token)
+        if user:
+            return user
+    return None

main.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""FastAPI application for HF Agent web interface - API ONLY MODE.
+This backend runs in API-only mode without serving static files.
+The frontend is hosted separately and communicates via HTTP/WebSocket.
+"""
+import logging
+import os
+from contextlib import asynccontextmanager
+from dotenv import load_dotenv
+load_dotenv()
+# Ensure HF_TOKEN is set — fall back to HF_ADMIN_TOKEN if available (HF Spaces)
+if not os.environ.get("HF_TOKEN") and os.environ.get("HF_ADMIN_TOKEN"):
+    os.environ["HF_TOKEN"] = os.environ.get("HF_ADMIN_TOKEN")
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from routes.agent import router as agent_router
+from routes.auth import router as auth_router
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan handler."""
+    logger.info("Starting HF Agent backend (API-only mode)...")
+    logger.info(f"CORS allowed origins: {os.environ.get('CORS_ORIGINS', '*')}")
+    yield
+    logger.info("Shutting down HF Agent backend...")
+app = FastAPI(
+    title="HF Agent API",
+    description="ML Engineering Assistant API - Separate Frontend/Backend Mode",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+# CORS middleware - allow all origins for separate hosting
+# In production, set CORS_ORIGINS env var to your frontend URL(s)
+cors_origins = os.environ.get("CORS_ORIGINS", "*")
+if cors_origins != "*":
+    cors_origins = [origin.strip() for origin in cors_origins.split(",")]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=cors_origins if isinstance(cors_origins, list) else ["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(agent_router)
+app.include_router(auth_router)
+@app.get("/api")
+async def api_root():
+    """API root endpoint."""
+    return {
+        "name": "HF Agent API",
+        "version": "1.0.0",
+        "mode": "api-only",
+        "docs": "/docs",
+    }
+@app.get("/")
+async def root():
+    """Root endpoint - indicates API-only mode."""
+    return {
+        "status": "ok",
+        "mode": "api-only",
+        "message": "Backend is running in API-only mode. Frontend is hosted separately.",
+        "api_docs": "/docs",
+        "api_endpoints": "/api",
+    }
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.environ.get("PORT", 7860))
+    host = os.environ.get("HOST", "0.0.0.0")
+    uvicorn.run(app, host=host, port=port)

models.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""Pydantic models for API requests and responses."""
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel
+class OpType(str, Enum):
+    """Operation types matching agent/core/agent_loop.py."""
+    USER_INPUT = "user_input"
+    EXEC_APPROVAL = "exec_approval"
+    INTERRUPT = "interrupt"
+    UNDO = "undo"
+    COMPACT = "compact"
+    SHUTDOWN = "shutdown"
+class Operation(BaseModel):
+    """Operation to be submitted to the agent."""
+    op_type: OpType
+    data: dict[str, Any] | None = None
+class Submission(BaseModel):
+    """Submission wrapper with ID and operation."""
+    id: str
+    operation: Operation
+class ToolApproval(BaseModel):
+    """Approval decision for a single tool call."""
+    tool_call_id: str
+    approved: bool
+    feedback: str | None = None
+class ApprovalRequest(BaseModel):
+    """Request to approve/reject tool calls."""
+    session_id: str
+    approvals: list[ToolApproval]
+class SubmitRequest(BaseModel):
+    """Request to submit user input."""
+    session_id: str
+    text: str
+class SessionResponse(BaseModel):
+    """Response when creating a new session."""
+    session_id: str
+    ready: bool = True
+class SessionInfo(BaseModel):
+    """Session metadata."""
+    session_id: str
+    created_at: str
+    is_active: bool
+    message_count: int
+    user_id: str = "dev"
+class HealthResponse(BaseModel):
+    """Health check response."""
+    status: str = "ok"
+    active_sessions: int = 0
+    max_sessions: int = 0
+class LLMHealthResponse(BaseModel):
+    """LLM provider health check response."""
+    status: str  # "ok" | "error"
+    model: str
+    error: str | None = None
+    error_type: str | None = None  # "auth" | "credits" | "rate_limit" | "network" | "unknown"

pyproject.toml ADDED Viewed

	@@ -0,0 +1,51 @@

+[project]
+name = "hf-agent"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "datasets>=4.4.1",
+    # Core dependencies (always required)
+    "pydantic>=2.12.3",
+    "python-dotenv>=1.2.1",
+]
+[project.optional-dependencies]
+# Agent runtime dependencies
+agent = [
+    "requests>=2.32.5",
+    "litellm>=1.0.0",
+    "huggingface-hub>=1.0.1",
+    "fastmcp>=2.4.0",
+    "lmnr>=0.7.23",  # Note: Using base package to avoid torch/transformers from [all] extra
+    "prompt-toolkit>=3.0.0",
+    "thefuzz>=0.22.1",
+    "nbconvert>=7.16.6",
+    "nbformat>=5.10.4",
+    "datasets>=4.3.0",  # For session logging to HF datasets
+    "whoosh>=2.7.4",
+    # Web backend dependencies
+    "fastapi>=0.115.0",
+    "uvicorn[standard]>=0.32.0",
+    "httpx>=0.27.0",
+    "websockets>=13.0",
+]
+# Evaluation/benchmarking dependencies
+eval = [
+    "inspect-ai>=0.3.149",
+    "pandas>=2.3.3",
+    "datasets>=4.3.0",
+    "tenacity>=8.0.0",
+]
+# Development and testing dependencies
+dev = [
+    "pytest>=9.0.2",
+]
+# All dependencies (agent + eval + dev)
+all = [
+    "hf-agent[agent,eval,dev]",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+# HF Agent Backend - Requirements
+# Python 3.12+
+# Core dependencies
+pydantic>=2.12.3
+python-dotenv>=1.2.1
+# Agent runtime dependencies
+requests>=2.32.5
+litellm>=1.0.0
+huggingface-hub>=1.0.1
+fastmcp>=2.4.0
+lmnr>=0.7.23
+prompt-toolkit>=3.0.0
+thefuzz>=0.22.1
+nbconvert>=7.16.6
+nbformat>=5.10.4
+datasets>=4.3.0
+whoosh>=2.7.4
+# Web backend dependencies
+fastapi>=0.115.0
+uvicorn[standard]>=0.32.0
+httpx>=0.27.0
+websockets>=13.0

routes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Routes package

routes/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (186 Bytes). View file

routes/__pycache__/agent.cpython-313.pyc ADDED Viewed

Binary file (18.7 kB). View file

routes/agent.py ADDED Viewed

	@@ -0,0 +1,404 @@

+"""Agent API routes - WebSocket and REST endpoints.
+All routes (except /health) require authentication via the get_current_user
+dependency. In dev mode (no OAUTH_CLIENT_ID), auth is bypassed automatically.
+"""
+import logging
+import os
+from typing import Any
+from dependencies import get_current_user, get_ws_user
+from fastapi import (
+    APIRouter,
+    Depends,
+    HTTPException,
+    Request,
+    WebSocket,
+    WebSocketDisconnect,
+)
+from litellm import acompletion
+from models import (
+    ApprovalRequest,
+    HealthResponse,
+    LLMHealthResponse,
+    SessionInfo,
+    SessionResponse,
+    SubmitRequest,
+)
+from session_manager import MAX_SESSIONS, SessionCapacityError, session_manager
+from websocket import manager as ws_manager
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api", tags=["agent"])
+AVAILABLE_MODELS = [
+    {
+        "id": "huggingface/novita/MiniMaxAI/MiniMax-M2.1",
+        "label": "MiniMax M2.1",
+        "provider": "huggingface",
+        "recommended": True,
+    },
+    {
+        "id": "anthropic/claude-opus-4-5-20251101",
+        "label": "Claude Opus 4.5",
+        "provider": "anthropic",
+        "recommended": True,
+    },
+    {
+        "id": "huggingface/novita/moonshotai/Kimi-K2.5",
+        "label": "Kimi K2.5",
+        "provider": "huggingface",
+    },
+    {
+        "id": "huggingface/novita/zai-org/GLM-5",
+        "label": "GLM 5",
+        "provider": "huggingface",
+    },
+]
+def _check_session_access(session_id: str, user: dict[str, Any]) -> None:
+    """Verify the user has access to the given session. Raises 403 or 404."""
+    info = session_manager.get_session_info(session_id)
+    if not info:
+        raise HTTPException(status_code=404, detail="Session not found")
+    if not session_manager.verify_session_access(session_id, user["user_id"]):
+        raise HTTPException(status_code=403, detail="Access denied to this session")
+@router.get("/health", response_model=HealthResponse)
+async def health_check() -> HealthResponse:
+    """Health check endpoint."""
+    return HealthResponse(
+        status="ok",
+        active_sessions=session_manager.active_session_count,
+        max_sessions=MAX_SESSIONS,
+    )
+@router.get("/health/llm", response_model=LLMHealthResponse)
+async def llm_health_check() -> LLMHealthResponse:
+    """Check if the LLM provider is reachable and the API key is valid.
+    Makes a minimal 1-token completion call.  Catches common errors:
+    - 401 → invalid API key
+    - 402/insufficient_quota → out of credits
+    - 429 → rate limited
+    - timeout / network → provider unreachable
+    """
+    model = session_manager.config.model_name
+    hf_key = os.environ.get("INFERENCE_TOKEN")
+    try:
+        await acompletion(
+            model=model,
+            messages=[{"role": "user", "content": "hi"}],
+            max_tokens=1,
+            timeout=10,
+            api_key=hf_key if hf_key and model.startswith("huggingface/") else None,
+        )
+        return LLMHealthResponse(status="ok", model=model)
+    except Exception as e:
+        err_str = str(e).lower()
+        error_type = "unknown"
+        if (
+            "401" in err_str
+            or "auth" in err_str
+            or "invalid" in err_str
+            or "api key" in err_str
+        ):
+            error_type = "auth"
+        elif (
+            "402" in err_str
+            or "credit" in err_str
+            or "quota" in err_str
+            or "insufficient" in err_str
+            or "billing" in err_str
+        ):
+            error_type = "credits"
+        elif "429" in err_str or "rate" in err_str:
+            error_type = "rate_limit"
+        elif "timeout" in err_str or "connect" in err_str or "network" in err_str:
+            error_type = "network"
+        logger.warning(f"LLM health check failed ({error_type}): {e}")
+        return LLMHealthResponse(
+            status="error",
+            model=model,
+            error=str(e)[:500],
+            error_type=error_type,
+        )
+@router.get("/config/model")
+async def get_model() -> dict:
+    """Get current model and available models. No auth required."""
+    return {
+        "current": session_manager.config.model_name,
+        "available": AVAILABLE_MODELS,
+    }
+@router.post("/config/model")
+async def set_model(body: dict, user: dict = Depends(get_current_user)) -> dict:
+    """Set the LLM model. Applies to new conversations."""
+    model_id = body.get("model")
+    if not model_id:
+        raise HTTPException(status_code=400, detail="Missing 'model' field")
+    valid_ids = {m["id"] for m in AVAILABLE_MODELS}
+    if model_id not in valid_ids:
+        raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
+    session_manager.config.model_name = model_id
+    logger.info(f"Model changed to {model_id} by {user.get('username', 'unknown')}")
+    return {"model": model_id}
+@router.post("/title")
+async def generate_title(
+    request: SubmitRequest, user: dict = Depends(get_current_user)
+) -> dict:
+    """Generate a short title for a chat session based on the first user message."""
+    model = session_manager.config.model_name
+    hf_key = os.environ.get("INFERENCE_TOKEN")
+    try:
+        response = await acompletion(
+            model=model,
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "Generate a very short title (max 6 words) for a chat conversation "
+                        "that starts with the following user message. "
+                        "Reply with ONLY the title, no quotes, no punctuation at the end."
+                    ),
+                },
+                {"role": "user", "content": request.text[:500]},
+            ],
+            max_tokens=20,
+            temperature=0.3,
+            timeout=8,
+            api_key=hf_key if hf_key and model.startswith("huggingface/") else None,
+        )
+        title = response.choices[0].message.content.strip().strip('"').strip("'")
+        # Safety: cap at 50 chars
+        if len(title) > 50:
+            title = title[:50].rstrip() + "…"
+        return {"title": title}
+    except Exception as e:
+        logger.warning(f"Title generation failed: {e}")
+        # Fallback: truncate the message
+        fallback = request.text.strip()
+        title = fallback[:40].rstrip() + "…" if len(fallback) > 40 else fallback
+        return {"title": title}
+@router.post("/session", response_model=SessionResponse)
+async def create_session(
+    request: Request, user: dict = Depends(get_current_user)
+) -> SessionResponse:
+    """Create a new agent session bound to the authenticated user.
+    The user's HF access token is extracted from the Authorization header
+    and stored in the session so that tools (e.g. hf_jobs) can act on
+    behalf of the user.
+    Returns 503 if the server or user has reached the session limit.
+    """
+    # Extract the user's HF token (Bearer header or HttpOnly cookie)
+    hf_token = None
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        hf_token = auth_header[7:]
+    if not hf_token:
+        hf_token = request.cookies.get("hf_access_token")
+    try:
+        session_id = await session_manager.create_session(
+            user_id=user["user_id"], hf_token=hf_token
+        )
+    except SessionCapacityError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+    return SessionResponse(session_id=session_id, ready=True)
+@router.get("/session/{session_id}", response_model=SessionInfo)
+async def get_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> SessionInfo:
+    """Get session information. Only accessible by the session owner."""
+    _check_session_access(session_id, user)
+    info = session_manager.get_session_info(session_id)
+    return SessionInfo(**info)
+@router.get("/sessions", response_model=list[SessionInfo])
+async def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:
+    """List sessions belonging to the authenticated user."""
+    sessions = session_manager.list_sessions(user_id=user["user_id"])
+    return [SessionInfo(**s) for s in sessions]
+@router.delete("/session/{session_id}")
+async def delete_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
+    """Delete a session. Only accessible by the session owner."""
+    _check_session_access(session_id, user)
+    success = await session_manager.delete_session(session_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found")
+    return {"status": "deleted", "session_id": session_id}
+@router.post("/submit")
+async def submit_input(
+    request: SubmitRequest, user: dict = Depends(get_current_user)
+) -> dict:
+    """Submit user input to a session. Only accessible by the session owner."""
+    _check_session_access(request.session_id, user)
+    success = await session_manager.submit_user_input(request.session_id, request.text)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found or inactive")
+    return {"status": "submitted", "session_id": request.session_id}
+@router.post("/approve")
+async def submit_approval(
+    request: ApprovalRequest, user: dict = Depends(get_current_user)
+) -> dict:
+    """Submit tool approvals to a session. Only accessible by the session owner."""
+    _check_session_access(request.session_id, user)
+    approvals = [
+        {
+            "tool_call_id": a.tool_call_id,
+            "approved": a.approved,
+            "feedback": a.feedback,
+        }
+        for a in request.approvals
+    ]
+    success = await session_manager.submit_approval(request.session_id, approvals)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found or inactive")
+    return {"status": "submitted", "session_id": request.session_id}
+@router.post("/interrupt/{session_id}")
+async def interrupt_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
+    """Interrupt the current operation in a session."""
+    _check_session_access(session_id, user)
+    success = await session_manager.interrupt(session_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found or inactive")
+    return {"status": "interrupted", "session_id": session_id}
+@router.post("/undo/{session_id}")
+async def undo_session(session_id: str, user: dict = Depends(get_current_user)) -> dict:
+    """Undo the last turn in a session."""
+    _check_session_access(session_id, user)
+    success = await session_manager.undo(session_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found or inactive")
+    return {"status": "undo_requested", "session_id": session_id}
+@router.post("/compact/{session_id}")
+async def compact_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
+    """Compact the context in a session."""
+    _check_session_access(session_id, user)
+    success = await session_manager.compact(session_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found or inactive")
+    return {"status": "compact_requested", "session_id": session_id}
+@router.post("/shutdown/{session_id}")
+async def shutdown_session(
+    session_id: str, user: dict = Depends(get_current_user)
+) -> dict:
+    """Shutdown a session."""
+    _check_session_access(session_id, user)
+    success = await session_manager.shutdown_session(session_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Session not found or inactive")
+    return {"status": "shutdown_requested", "session_id": session_id}
+@router.websocket("/ws/{session_id}")
+async def websocket_endpoint(websocket: WebSocket, session_id: str) -> None:
+    """WebSocket endpoint for real-time events.
+    Authentication is done via:
+    - ?token= query parameter (for browsers that can't send WS headers)
+    - Cookie (automatic for same-origin connections)
+    - Dev mode bypass (when OAUTH_CLIENT_ID is not set)
+    NOTE: We must accept() before close() so the browser receives our custom
+    close codes (4001, 4003, 4004).  If we close() before accept(), Starlette
+    sends HTTP 403 and the browser only sees code 1006 (abnormal closure).
+    """
+    logger.info(f"WebSocket connection request for session {session_id}")
+    # Authenticate the WebSocket connection
+    user = await get_ws_user(websocket)
+    if not user:
+        logger.warning(
+            f"WebSocket rejected: authentication failed for session {session_id}"
+        )
+        await websocket.accept()
+        await websocket.close(code=4001, reason="Authentication required")
+        return
+    # Verify session exists
+    info = session_manager.get_session_info(session_id)
+    if not info:
+        logger.warning(f"WebSocket rejected: session {session_id} not found")
+        await websocket.accept()
+        await websocket.close(code=4004, reason="Session not found")
+        return
+    # Verify user owns the session
+    if not session_manager.verify_session_access(session_id, user["user_id"]):
+        logger.warning(
+            f"WebSocket rejected: user {user['user_id']} denied access to session {session_id}"
+        )
+        await websocket.accept()
+        await websocket.close(code=4003, reason="Access denied")
+        return
+    await ws_manager.connect(websocket, session_id)
+    # Send "ready" immediately on WebSocket connection so the frontend
+    # knows the session is alive.  The original ready event from _run_session
+    # fires before the WS is connected and is always lost.
+    try:
+        await websocket.send_json(
+            {
+                "event_type": "ready",
+                "data": {"message": "Agent initialized"},
+            }
+        )
+    except Exception as e:
+        logger.error(f"Failed to send ready event for session {session_id}: {e}")
+    try:
+        while True:
+            # Keep connection alive, handle ping/pong
+            data = await websocket.receive_json()
+            # Handle client messages (e.g., ping)
+            if data.get("type") == "ping":
+                await websocket.send_json({"type": "pong"})
+    except WebSocketDisconnect:
+        logger.info(f"WebSocket disconnected for session {session_id}")
+    except Exception as e:
+        logger.error(f"WebSocket error for session {session_id}: {e}")
+    finally:
+        ws_manager.disconnect(session_id)

routes/auth.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""Authentication routes for HF OAuth.
+Handles the OAuth 2.0 authorization code flow with HF as provider.
+After successful auth, sets an HttpOnly cookie with the access token.
+"""
+import os
+import secrets
+import time
+from urllib.parse import urlencode
+import httpx
+from dependencies import AUTH_ENABLED, get_current_user
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import RedirectResponse
+router = APIRouter(prefix="/auth", tags=["auth"])
+# OAuth configuration from environment
+OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID", "")
+OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET", "")
+OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
+# In-memory OAuth state store with expiry (5 min TTL)
+_OAUTH_STATE_TTL = 300
+oauth_states: dict[str, dict] = {}
+def _cleanup_expired_states() -> None:
+    """Remove expired OAuth states to prevent memory growth."""
+    now = time.time()
+    expired = [k for k, v in oauth_states.items() if now > v.get("expires_at", 0)]
+    for k in expired:
+        del oauth_states[k]
+def get_redirect_uri(request: Request) -> str:
+    """Get the OAuth callback redirect URI."""
+    # In HF Spaces, use the SPACE_HOST if available
+    space_host = os.environ.get("SPACE_HOST")
+    if space_host:
+        return f"https://{space_host}/auth/callback"
+    # Otherwise construct from request
+    return str(request.url_for("oauth_callback"))
+@router.get("/login")
+async def oauth_login(request: Request) -> RedirectResponse:
+    """Initiate OAuth login flow."""
+    if not OAUTH_CLIENT_ID:
+        raise HTTPException(
+            status_code=500,
+            detail="OAuth not configured. Set OAUTH_CLIENT_ID environment variable.",
+        )
+    # Clean up expired states to prevent memory growth
+    _cleanup_expired_states()
+    # Generate state for CSRF protection
+    state = secrets.token_urlsafe(32)
+    oauth_states[state] = {
+        "redirect_uri": get_redirect_uri(request),
+        "expires_at": time.time() + _OAUTH_STATE_TTL,
+    }
+    # Build authorization URL
+    params = {
+        "client_id": OAUTH_CLIENT_ID,
+        "redirect_uri": get_redirect_uri(request),
+        "scope": "openid profile read-repos write-repos contribute-repos manage-repos inference-api jobs write-discussions",
+        "response_type": "code",
+        "state": state,
+        "orgIds": os.environ.get(
+            "HF_OAUTH_ORG_ID", "698dbf55845d85df163175f1"
+        ),  # ml-agent-explorers
+    }
+    auth_url = f"{OPENID_PROVIDER_URL}/oauth/authorize?{urlencode(params)}"
+    return RedirectResponse(url=auth_url)
+@router.get("/callback")
+async def oauth_callback(
+    request: Request, code: str = "", state: str = ""
+) -> RedirectResponse:
+    """Handle OAuth callback."""
+    # Verify state
+    if state not in oauth_states:
+        raise HTTPException(status_code=400, detail="Invalid state parameter")
+    stored_state = oauth_states.pop(state)
+    redirect_uri = stored_state["redirect_uri"]
+    if not code:
+        raise HTTPException(status_code=400, detail="No authorization code provided")
+    # Exchange code for token
+    token_url = f"{OPENID_PROVIDER_URL}/oauth/token"
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.post(
+                token_url,
+                data={
+                    "grant_type": "authorization_code",
+                    "code": code,
+                    "redirect_uri": redirect_uri,
+                    "client_id": OAUTH_CLIENT_ID,
+                    "client_secret": OAUTH_CLIENT_SECRET,
+                },
+            )
+            response.raise_for_status()
+            token_data = response.json()
+        except httpx.HTTPError as e:
+            raise HTTPException(status_code=500, detail=f"Token exchange failed: {e}")
+    # Get user info
+    access_token = token_data.get("access_token")
+    if not access_token:
+        raise HTTPException(
+            status_code=500,
+            detail="Token exchange succeeded but no access_token was returned.",
+        )
+    # Fetch user info (optional — failure is not fatal)
+    async with httpx.AsyncClient() as client:
+        try:
+            userinfo_response = await client.get(
+                f"{OPENID_PROVIDER_URL}/oauth/userinfo",
+                headers={"Authorization": f"Bearer {access_token}"},
+            )
+            userinfo_response.raise_for_status()
+        except httpx.HTTPError:
+            pass  # user_info not required for auth flow
+    # Set access token as HttpOnly cookie (not in URL — avoids leaks via
+    # Referrer headers, browser history, and server logs)
+    is_production = bool(os.environ.get("SPACE_HOST"))
+    response = RedirectResponse(url="/", status_code=302)
+    response.set_cookie(
+        key="hf_access_token",
+        value=access_token,
+        httponly=True,
+        secure=is_production,  # Secure flag only in production (HTTPS)
+        samesite="lax",
+        max_age=3600 * 24,  # 24 hours
+        path="/",
+    )
+    return response
+@router.get("/logout")
+async def logout() -> RedirectResponse:
+    """Log out the user by clearing the auth cookie."""
+    response = RedirectResponse(url="/")
+    response.delete_cookie(key="hf_access_token", path="/")
+    return response
+@router.get("/status")
+async def auth_status() -> dict:
+    """Check if OAuth is enabled on this instance."""
+    return {"auth_enabled": AUTH_ENABLED}
+@router.get("/me")
+async def get_me(user: dict = Depends(get_current_user)) -> dict:
+    """Get current user info. Returns the authenticated user or dev user.
+    Uses the shared auth dependency which handles cookie + Bearer token.
+    """
+    return user

session_manager.py ADDED Viewed

	@@ -0,0 +1,376 @@

+"""Session manager for handling multiple concurrent agent sessions."""
+import asyncio
+import logging
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Optional
+from websocket import manager as ws_manager
+from agent.config import load_config
+from agent.core.agent_loop import process_submission
+from agent.core.session import Event, OpType, Session
+from agent.core.tools import ToolRouter
+# Get project root (parent of backend directory)
+PROJECT_ROOT = Path(__file__).parent.parent
+DEFAULT_CONFIG_PATH = str(PROJECT_ROOT / "configs" / "main_agent_config.json")
+# These dataclasses match agent/main.py structure
+@dataclass
+class Operation:
+    """Operation to be executed by the agent."""
+    op_type: OpType
+    data: Optional[dict[str, Any]] = None
+@dataclass
+class Submission:
+    """Submission to the agent loop."""
+    id: str
+    operation: Operation
+logger = logging.getLogger(__name__)
+@dataclass
+class AgentSession:
+    """Wrapper for an agent session with its associated resources."""
+    session_id: str
+    session: Session
+    tool_router: ToolRouter
+    submission_queue: asyncio.Queue
+    user_id: str = "dev"  # Owner of this session
+    hf_token: str | None = None  # User's HF OAuth token for tool execution
+    task: asyncio.Task | None = None
+    created_at: datetime = field(default_factory=datetime.utcnow)
+    is_active: bool = True
+class SessionCapacityError(Exception):
+    """Raised when no more sessions can be created."""
+    def __init__(self, message: str, error_type: str = "global") -> None:
+        super().__init__(message)
+        self.error_type = error_type  # "global" or "per_user"
+# ── Capacity limits ─────────────────────────────────────────────────
+# Estimated for HF Spaces cpu-basic (2 vCPU, 16 GB RAM).
+# Each session uses ~10-20 MB (context, tools, queues, task).
+MAX_SESSIONS: int = 50
+MAX_SESSIONS_PER_USER: int = 10
+class SessionManager:
+    """Manages multiple concurrent agent sessions."""
+    def __init__(self, config_path: str | None = None) -> None:
+        self.config = load_config(config_path or DEFAULT_CONFIG_PATH)
+        self.sessions: dict[str, AgentSession] = {}
+        self._lock = asyncio.Lock()
+    def _count_user_sessions(self, user_id: str) -> int:
+        """Count active sessions owned by a specific user."""
+        return sum(
+            1
+            for s in self.sessions.values()
+            if s.user_id == user_id and s.is_active
+        )
+    async def create_session(self, user_id: str = "dev", hf_token: str | None = None) -> str:
+        """Create a new agent session and return its ID.
+        Session() and ToolRouter() constructors contain blocking I/O
+        (e.g. HfApi().whoami(), litellm.get_max_tokens()) so they are
+        executed in a thread pool to avoid freezing the async event loop.
+        Args:
+            user_id: The ID of the user who owns this session.
+        Raises:
+            SessionCapacityError: If the server or user has reached the
+                maximum number of concurrent sessions.
+        """
+        # ── Capacity checks ──────────────────────────────────────────
+        async with self._lock:
+            active_count = self.active_session_count
+            if active_count >= MAX_SESSIONS:
+                raise SessionCapacityError(
+                    f"Server is at capacity ({active_count}/{MAX_SESSIONS} sessions). "
+                    "Please try again later.",
+                    error_type="global",
+                )
+            if user_id != "dev":
+                user_count = self._count_user_sessions(user_id)
+                if user_count >= MAX_SESSIONS_PER_USER:
+                    raise SessionCapacityError(
+                        f"You have reached the maximum of {MAX_SESSIONS_PER_USER} "
+                        "concurrent sessions. Please close an existing session first.",
+                        error_type="per_user",
+                    )
+        session_id = str(uuid.uuid4())
+        # Create queues for this session
+        submission_queue: asyncio.Queue = asyncio.Queue()
+        event_queue: asyncio.Queue = asyncio.Queue()
+        # Run blocking constructors in a thread to keep the event loop responsive.
+        # Without this, Session.__init__ → ContextManager → litellm.get_max_tokens()
+        # blocks all HTTP/WebSocket handling.
+        import time as _time
+        def _create_session_sync():
+            t0 = _time.monotonic()
+            tool_router = ToolRouter(self.config.mcpServers)
+            session = Session(event_queue, config=self.config, tool_router=tool_router)
+            t1 = _time.monotonic()
+            logger.info(f"Session initialized in {t1 - t0:.2f}s")
+            return tool_router, session
+        tool_router, session = await asyncio.to_thread(_create_session_sync)
+        # Store user's HF token on the session so tools can use it
+        session.hf_token = hf_token
+        # Create wrapper
+        agent_session = AgentSession(
+            session_id=session_id,
+            session=session,
+            tool_router=tool_router,
+            submission_queue=submission_queue,
+            user_id=user_id,
+            hf_token=hf_token,
+        )
+        async with self._lock:
+            self.sessions[session_id] = agent_session
+        # Start the agent loop task
+        task = asyncio.create_task(
+            self._run_session(session_id, submission_queue, event_queue, tool_router)
+        )
+        agent_session.task = task
+        logger.info(f"Created session {session_id} for user {user_id}")
+        return session_id
+    async def _run_session(
+        self,
+        session_id: str,
+        submission_queue: asyncio.Queue,
+        event_queue: asyncio.Queue,
+        tool_router: ToolRouter,
+    ) -> None:
+        """Run the agent loop for a session and forward events to WebSocket."""
+        agent_session = self.sessions.get(session_id)
+        if not agent_session:
+            logger.error(f"Session {session_id} not found")
+            return
+        session = agent_session.session
+        # Start event forwarder task
+        event_forwarder = asyncio.create_task(
+            self._forward_events(session_id, event_queue)
+        )
+        try:
+            async with tool_router:
+                # Send ready event
+                await session.send_event(
+                    Event(event_type="ready", data={"message": "Agent initialized"})
+                )
+                while session.is_running:
+                    try:
+                        # Wait for submission with timeout to allow checking is_running
+                        submission = await asyncio.wait_for(
+                            submission_queue.get(), timeout=1.0
+                        )
+                        should_continue = await process_submission(session, submission)
+                        if not should_continue:
+                            break
+                    except asyncio.TimeoutError:
+                        continue
+                    except asyncio.CancelledError:
+                        logger.info(f"Session {session_id} cancelled")
+                        break
+                    except Exception as e:
+                        logger.error(f"Error in session {session_id}: {e}")
+                        await session.send_event(
+                            Event(event_type="error", data={"error": str(e)})
+                        )
+        finally:
+            event_forwarder.cancel()
+            try:
+                await event_forwarder
+            except asyncio.CancelledError:
+                pass
+            async with self._lock:
+                if session_id in self.sessions:
+                    self.sessions[session_id].is_active = False
+            logger.info(f"Session {session_id} ended")
+    async def _forward_events(
+        self, session_id: str, event_queue: asyncio.Queue
+    ) -> None:
+        """Forward events from the agent to the WebSocket."""
+        while True:
+            try:
+                event: Event = await event_queue.get()
+                await ws_manager.send_event(session_id, event.event_type, event.data)
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error forwarding event for {session_id}: {e}")
+    async def submit(self, session_id: str, operation: Operation) -> bool:
+        """Submit an operation to a session."""
+        async with self._lock:
+            agent_session = self.sessions.get(session_id)
+        if not agent_session or not agent_session.is_active:
+            logger.warning(f"Session {session_id} not found or inactive")
+            return False
+        submission = Submission(id=f"sub_{uuid.uuid4().hex[:8]}", operation=operation)
+        await agent_session.submission_queue.put(submission)
+        return True
+    async def submit_user_input(self, session_id: str, text: str) -> bool:
+        """Submit user input to a session."""
+        operation = Operation(op_type=OpType.USER_INPUT, data={"text": text})
+        return await self.submit(session_id, operation)
+    async def submit_approval(
+        self, session_id: str, approvals: list[dict[str, Any]]
+    ) -> bool:
+        """Submit tool approvals to a session."""
+        operation = Operation(
+            op_type=OpType.EXEC_APPROVAL, data={"approvals": approvals}
+        )
+        return await self.submit(session_id, operation)
+    async def interrupt(self, session_id: str) -> bool:
+        """Interrupt a session."""
+        operation = Operation(op_type=OpType.INTERRUPT)
+        return await self.submit(session_id, operation)
+    async def undo(self, session_id: str) -> bool:
+        """Undo last turn in a session."""
+        operation = Operation(op_type=OpType.UNDO)
+        return await self.submit(session_id, operation)
+    async def compact(self, session_id: str) -> bool:
+        """Compact context in a session."""
+        operation = Operation(op_type=OpType.COMPACT)
+        return await self.submit(session_id, operation)
+    async def shutdown_session(self, session_id: str) -> bool:
+        """Shutdown a specific session."""
+        operation = Operation(op_type=OpType.SHUTDOWN)
+        success = await self.submit(session_id, operation)
+        if success:
+            async with self._lock:
+                agent_session = self.sessions.get(session_id)
+                if agent_session and agent_session.task:
+                    # Wait for task to complete
+                    try:
+                        await asyncio.wait_for(agent_session.task, timeout=5.0)
+                    except asyncio.TimeoutError:
+                        agent_session.task.cancel()
+        return success
+    async def delete_session(self, session_id: str) -> bool:
+        """Delete a session entirely."""
+        async with self._lock:
+            agent_session = self.sessions.pop(session_id, None)
+        if not agent_session:
+            return False
+        # Cancel the task if running
+        if agent_session.task and not agent_session.task.done():
+            agent_session.task.cancel()
+            try:
+                await agent_session.task
+            except asyncio.CancelledError:
+                pass
+        return True
+    def get_session_owner(self, session_id: str) -> str | None:
+        """Get the user_id that owns a session, or None if session doesn't exist."""
+        agent_session = self.sessions.get(session_id)
+        if not agent_session:
+            return None
+        return agent_session.user_id
+    def verify_session_access(self, session_id: str, user_id: str) -> bool:
+        """Check if a user has access to a session.
+        Returns True if:
+        - The session exists AND the user owns it
+        - The user_id is "dev" (dev mode bypass)
+        """
+        owner = self.get_session_owner(session_id)
+        if owner is None:
+            return False
+        if user_id == "dev" or owner == "dev":
+            return True
+        return owner == user_id
+    def get_session_info(self, session_id: str) -> dict[str, Any] | None:
+        """Get information about a session."""
+        agent_session = self.sessions.get(session_id)
+        if not agent_session:
+            return None
+        return {
+            "session_id": session_id,
+            "created_at": agent_session.created_at.isoformat(),
+            "is_active": agent_session.is_active,
+            "message_count": len(agent_session.session.context_manager.items),
+            "user_id": agent_session.user_id,
+        }
+    def list_sessions(self, user_id: str | None = None) -> list[dict[str, Any]]:
+        """List sessions, optionally filtered by user.
+        Args:
+            user_id: If provided, only return sessions owned by this user.
+                     If "dev", return all sessions (dev mode).
+        """
+        results = []
+        for sid in self.sessions:
+            info = self.get_session_info(sid)
+            if not info:
+                continue
+            if user_id and user_id != "dev" and info.get("user_id") != user_id:
+                continue
+            results.append(info)
+        return results
+    @property
+    def active_session_count(self) -> int:
+        """Get count of active sessions."""
+        return sum(1 for s in self.sessions.values() if s.is_active)
+# Global session manager instance
+session_manager = SessionManager()

start.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/bin/bash
+# HF Agent Backend Startup Script
+# Load environment variables from .env file if it exists
+if [ -f .env ]; then
+    echo "Loading environment from .env file..."
+    set -a
+    source .env
+    set +a
+fi
+# Default configuration
+export PORT=${PORT:-7860}
+export HOST=${HOST:-0.0.0.0}
+export CORS_ORIGINS=${CORS_ORIGINS:-*}
+echo "=========================================="
+echo "HF Agent Backend (API-only mode)"
+echo "=========================================="
+echo "Host: $HOST"
+echo "Port: $PORT"
+echo "CORS Origins: $CORS_ORIGINS"
+echo "=========================================="
+# Run the FastAPI application
+exec uvicorn main:app --host "$HOST" --port "$PORT" --reload

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

websocket.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""WebSocket connection manager for real-time communication."""
+import logging
+from typing import Any
+from fastapi import WebSocket
+logger = logging.getLogger(__name__)
+class ConnectionManager:
+    """Manages WebSocket connections for multiple sessions."""
+    def __init__(self) -> None:
+        # session_id -> WebSocket
+        self.active_connections: dict[str, WebSocket] = {}
+    async def connect(self, websocket: WebSocket, session_id: str) -> None:
+        """Accept a WebSocket connection and register it."""
+        logger.info(f"Attempting to accept WebSocket for session {session_id}")
+        await websocket.accept()
+        self.active_connections[session_id] = websocket
+        logger.info(f"WebSocket connected and registered for session {session_id}")
+    def disconnect(self, session_id: str) -> None:
+        """Remove a WebSocket connection."""
+        if session_id in self.active_connections:
+            del self.active_connections[session_id]
+        logger.info(f"WebSocket disconnected for session {session_id}")
+    async def send_event(
+        self, session_id: str, event_type: str, data: dict[str, Any] | None = None
+    ) -> None:
+        """Send an event to a specific session's WebSocket."""
+        if session_id not in self.active_connections:
+            logger.warning(f"No active connection for session {session_id}")
+            return
+        message = {"event_type": event_type}
+        if data is not None:
+            message["data"] = data
+        try:
+            await self.active_connections[session_id].send_json(message)
+        except Exception as e:
+            logger.error(f"Error sending to session {session_id}: {e}")
+            self.disconnect(session_id)
+    async def broadcast(
+        self, event_type: str, data: dict[str, Any] | None = None
+    ) -> None:
+        """Broadcast an event to all connected sessions."""
+        for session_id in list(self.active_connections.keys()):
+            await self.send_event(session_id, event_type, data)
+    def is_connected(self, session_id: str) -> bool:
+        """Check if a session has an active WebSocket connection."""
+        return session_id in self.active_connections
+# Global connection manager instance
+manager = ConnectionManager()