Spaces:

smolagents
/

ml-agent

Running

App Files Files Community

Aksel Joonas Reedi commited on Jan 7

Commit

6a518b4

2 Parent(s): 7fa8e8d bc8323d

comprehensive session logging

Browse files

Add session trajectory logging with race-condition-free uploads

Files changed (9) hide show

.gitignore +2 -1
agent/config.py +3 -0
agent/core/agent_loop.py +52 -18
agent/core/session.py +171 -1
agent/core/session_uploader.py +194 -0
agent/main.py +1 -2
configs/main_agent_config.json +3 -1
pyproject.toml +2 -0
uv.lock +4 -0

.gitignore CHANGED Viewed

@@ -15,4 +15,5 @@ wheels/
 *.csv
 /logs
 hf-agent-leaderboard/
-.cursor/

 *.csv
 /logs
 hf-agent-leaderboard/
+.cursor/
+session_logs/

agent/config.py CHANGED Viewed

@@ -19,6 +19,9 @@ class Config(BaseModel):
     model_name: str
     mcpServers: dict[str, MCPServerConfig] = {}
 def substitute_env_vars(obj: Any) -> Any:

     model_name: str
     mcpServers: dict[str, MCPServerConfig] = {}
+    save_sessions: bool = True
+    session_dataset_repo: str = "smolagents/hf-agent-sessions"
+    auto_save_interval: int = 3  # Save every N user turns (0 = disabled)
 def substitute_env_vars(obj: Any) -> Any:

agent/core/agent_loop.py CHANGED Viewed

@@ -255,6 +255,11 @@ class Handlers:
                 data={"history_size": len(session.context_manager.items)},
             )
         )
         return final_response
     @staticmethod
@@ -414,6 +419,14 @@ class Handlers:
     @staticmethod
     async def shutdown(session: Session) -> bool:
         """Handle shutdown (like shutdown in codex.rs:1329)"""
         session.is_running = False
         await session.send_event(Event(event_type="shutdown"))
         return True
@@ -474,26 +487,47 @@ async def submission_loop(
     session = Session(event_queue, config=config, tool_router=tool_router)
     print("Agent loop started")
-    # Main processing loop
-    async with tool_router:
-        # Emit ready event after initialization
-        await session.send_event(
-            Event(event_type="ready", data={"message": "Agent initialized"})
         )
-        while session.is_running:
-            submission = await submission_queue.get()
-            try:
-                should_continue = await process_submission(session, submission)
-                if not should_continue:
                     break
-            except asyncio.CancelledError:
-                break
-            except Exception as e:
-                print(f"❌ Error in agent loop: {e}")
-                await session.send_event(
-                    Event(event_type="error", data={"error": str(e)})
-                )
-    print("🛑 Agent loop exited")

                 data={"history_size": len(session.context_manager.items)},
             )
         )
+        # Increment turn counter and check for auto-save
+        session.increment_turn()
+        await session.auto_save_if_needed()
         return final_response
     @staticmethod
     @staticmethod
     async def shutdown(session: Session) -> bool:
         """Handle shutdown (like shutdown in codex.rs:1329)"""
+        # Save session trajectory if enabled (fire-and-forget, returns immediately)
+        if session.config.save_sessions:
+            print("💾 Saving session...")
+            repo_id = session.config.session_dataset_repo
+            local_path = session.save_and_upload_detached(repo_id)
+            if local_path:
+                print("✅ Session saved locally, upload in progress")
         session.is_running = False
         await session.send_event(Event(event_type="shutdown"))
         return True
     session = Session(event_queue, config=config, tool_router=tool_router)
     print("Agent loop started")
+    # Retry any failed uploads from previous sessions (fire-and-forget)
+    if config and config.save_sessions:
+        Session.retry_failed_uploads_detached(
+            directory="session_logs", repo_id=config.session_dataset_repo
         )
+    try:
+        # Main processing loop
+        async with tool_router:
+            # Emit ready event after initialization
+            await session.send_event(
+                Event(event_type="ready", data={"message": "Agent initialized"})
+            )
+            while session.is_running:
+                submission = await submission_queue.get()
+                try:
+                    should_continue = await process_submission(session, submission)
+                    if not should_continue:
+                        break
+                except asyncio.CancelledError:
+                    print("\n⚠️  Agent loop cancelled")
                     break
+                except Exception as e:
+                    print(f"❌ Error in agent loop: {e}")
+                    await session.send_event(
+                        Event(event_type="error", data={"error": str(e)})
+                    )
+        print("🛑 Agent loop exited")
+    finally:
+        # Emergency save if session saving is enabled and shutdown wasn't called properly
+        if session.config.save_sessions and session.is_running:
+            print("\n💾 Emergency save: preserving session before exit...")
+            try:
+                local_path = session.save_and_upload_detached(
+                    session.config.session_dataset_repo
+                )
+                if local_path:
+                    print("✅ Emergency save successful, upload in progress")
+            except Exception as e:
+                print(f"❌ Emergency save failed: {e}")

agent/core/session.py CHANGED Viewed

@@ -1,7 +1,12 @@
 import asyncio
 import uuid
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any, Optional
 from litellm import get_max_tokens
@@ -55,11 +60,176 @@ class Session:
         self.current_task: asyncio.Task | None = None
         self.pending_approval: Optional[dict[str, Any]] = None
     async def send_event(self, event: Event) -> None:
-        """Send event back to client"""
         await self.event_queue.put(event)
     def interrupt(self) -> None:
         """Interrupt current running task"""
         if self.current_task and not self.current_task.done():
             self.current_task.cancel()

 import asyncio
+import json
+import subprocess
+import sys
 import uuid
 from dataclasses import dataclass
+from datetime import datetime
 from enum import Enum
+from pathlib import Path
 from typing import Any, Optional
 from litellm import get_max_tokens
         self.current_task: asyncio.Task | None = None
         self.pending_approval: Optional[dict[str, Any]] = None
+        # Session trajectory logging
+        self.logged_events: list[dict] = []
+        self.session_start_time = datetime.now().isoformat()
+        self.turn_count: int = 0
+        self.last_auto_save_turn: int = 0
     async def send_event(self, event: Event) -> None:
+        """Send event back to client and log to trajectory"""
         await self.event_queue.put(event)
+        # Log event to trajectory
+        self.logged_events.append(
+            {
+                "timestamp": datetime.now().isoformat(),
+                "event_type": event.event_type,
+                "data": event.data,
+            }
+        )
     def interrupt(self) -> None:
         """Interrupt current running task"""
         if self.current_task and not self.current_task.done():
             self.current_task.cancel()
+    def increment_turn(self) -> None:
+        """Increment turn counter (called after each user interaction)"""
+        self.turn_count += 1
+    async def auto_save_if_needed(self) -> None:
+        """Check if auto-save should trigger and save if so (completely non-blocking)"""
+        if not self.config.save_sessions:
+            return
+        interval = self.config.auto_save_interval
+        if interval <= 0:
+            return
+        turns_since_last_save = self.turn_count - self.last_auto_save_turn
+        if turns_since_last_save >= interval:
+            print(f"\n💾 Auto-saving session (turn {self.turn_count})...")
+            # Fire-and-forget save - returns immediately
+            self.save_and_upload_detached(self.config.session_dataset_repo)
+            self.last_auto_save_turn = self.turn_count
+    def get_trajectory(self) -> dict:
+        """Serialize complete session trajectory for logging"""
+        return {
+            "session_id": self.session_id,
+            "session_start_time": self.session_start_time,
+            "session_end_time": datetime.now().isoformat(),
+            "model_name": self.config.model_name,
+            "messages": [msg.model_dump() for msg in self.context_manager.items],
+            "events": self.logged_events,
+        }
+    def save_trajectory_local(
+        self,
+        directory: str = "session_logs",
+        upload_status: str = "pending",
+        dataset_url: Optional[str] = None,
+    ) -> Optional[str]:
+        """
+        Save trajectory to local JSON file as backup with upload status
+        Args:
+            directory: Directory to save logs (default: "session_logs")
+            upload_status: Status of upload attempt ("pending", "success", "failed")
+            dataset_url: URL of dataset if upload succeeded
+        Returns:
+            Path to saved file if successful, None otherwise
+        """
+        try:
+            log_dir = Path(directory)
+            log_dir.mkdir(parents=True, exist_ok=True)
+            trajectory = self.get_trajectory()
+            # Add upload metadata
+            trajectory["upload_status"] = upload_status
+            trajectory["upload_url"] = dataset_url
+            trajectory["last_save_time"] = datetime.now().isoformat()
+            filename = f"session_{self.session_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+            filepath = log_dir / filename
+            with open(filepath, "w") as f:
+                json.dump(trajectory, f, indent=2)
+            return str(filepath)
+        except Exception as e:
+            print(f"Failed to save session locally: {e}")
+            return None
+    def update_local_save_status(
+        self, filepath: str, upload_status: str, dataset_url: Optional[str] = None
+    ) -> bool:
+        """Update the upload status of an existing local save file"""
+        try:
+            with open(filepath, "r") as f:
+                data = json.load(f)
+            data["upload_status"] = upload_status
+            data["upload_url"] = dataset_url
+            data["last_save_time"] = datetime.now().isoformat()
+            with open(filepath, "w") as f:
+                json.dump(data, f, indent=2)
+            return True
+        except Exception as e:
+            print(f"Failed to update local save status: {e}")
+            return False
+    def save_and_upload_detached(self, repo_id: str) -> Optional[str]:
+        """
+        Save session locally and spawn detached subprocess for upload (fire-and-forget)
+        Args:
+            repo_id: HuggingFace dataset repo ID
+        Returns:
+            Path to local save file
+        """
+        # Save locally first (fast, synchronous)
+        local_path = self.save_trajectory_local(upload_status="pending")
+        if not local_path:
+            return None
+        # Spawn detached subprocess for upload (fire-and-forget)
+        try:
+            uploader_script = Path(__file__).parent / "session_uploader.py"
+            # Use Popen with detached process
+            subprocess.Popen(
+                [sys.executable, str(uploader_script), "upload", local_path, repo_id],
+                stdin=subprocess.DEVNULL,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True,  # Detach from parent
+            )
+        except Exception as e:
+            print(f"⚠️  Failed to spawn upload subprocess: {e}")
+        return local_path
+    @staticmethod
+    def retry_failed_uploads_detached(
+        directory: str = "session_logs", repo_id: Optional[str] = None
+    ) -> None:
+        """
+        Spawn detached subprocess to retry failed/pending uploads (fire-and-forget)
+        Args:
+            directory: Directory containing session logs
+            repo_id: Target dataset repo ID
+        """
+        if not repo_id:
+            return
+        try:
+            uploader_script = Path(__file__).parent / "session_uploader.py"
+            # Spawn detached subprocess for retry
+            subprocess.Popen(
+                [sys.executable, str(uploader_script), "retry", directory, repo_id],
+                stdin=subprocess.DEVNULL,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True,  # Detach from parent
+            )
+        except Exception as e:
+            print(f"⚠️  Failed to spawn retry subprocess: {e}")

agent/core/session_uploader.py ADDED Viewed

	@@ -0,0 +1,194 @@

+#!/usr/bin/env python3
+"""
+Standalone script for uploading session trajectories to HuggingFace.
+This runs as a separate process to avoid blocking the main agent.
+Uses individual file uploads to avoid race conditions.
+"""
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+def upload_session_as_file(
+    session_file: str, repo_id: str, max_retries: int = 3
+) -> bool:
+    """
+    Upload a single session as an individual JSONL file (no race conditions)
+    Args:
+        session_file: Path to local session JSON file
+        repo_id: HuggingFace dataset repo ID
+        max_retries: Number of retry attempts
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        from huggingface_hub import HfApi
+    except ImportError:
+        print("Error: huggingface_hub library not available", file=sys.stderr)
+        return False
+    try:
+        # Load session data
+        with open(session_file, "r") as f:
+            data = json.load(f)
+        # Check if already uploaded
+        upload_status = data.get("upload_status")
+        if upload_status == "success":
+            return True
+        hf_token = os.getenv("HF_TOKEN")
+        if not hf_token:
+            # Update status to failed
+            data["upload_status"] = "failed"
+            with open(session_file, "w") as f:
+                json.dump(data, f, indent=2)
+            return False
+        # Prepare JSONL content (single line)
+        # Store messages and events as JSON strings to avoid schema conflicts
+        session_row = {
+            "session_id": data["session_id"],
+            "session_start_time": data["session_start_time"],
+            "session_end_time": data["session_end_time"],
+            "model_name": data["model_name"],
+            "messages": json.dumps(data["messages"]),
+            "events": json.dumps(data["events"]),
+        }
+        # Create temporary JSONL file
+        import tempfile
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".jsonl", delete=False
+        ) as tmp:
+            json.dump(session_row, tmp)  # Single line JSON
+            tmp_path = tmp.name
+        try:
+            # Generate unique path in repo: sessions/YYYY-MM-DD/session_id.jsonl
+            session_id = data["session_id"]
+            date_str = datetime.fromisoformat(data["session_start_time"]).strftime(
+                "%Y-%m-%d"
+            )
+            repo_path = f"sessions/{date_str}/{session_id}.jsonl"
+            # Upload with retries
+            api = HfApi()
+            for attempt in range(max_retries):
+                try:
+                    # Try to create repo if it doesn't exist (idempotent)
+                    try:
+                        api.create_repo(
+                            repo_id=repo_id,
+                            repo_type="dataset",
+                            private=True,
+                            token=hf_token,
+                            exist_ok=True,  # Don't fail if already exists
+                        )
+                    except Exception:
+                        # Repo might already exist, continue
+                        pass
+                    # Upload the session file
+                    api.upload_file(
+                        path_or_fileobj=tmp_path,
+                        path_in_repo=repo_path,
+                        repo_id=repo_id,
+                        repo_type="dataset",
+                        token=hf_token,
+                        commit_message=f"Add session {session_id}",
+                    )
+                    # Update local status to success
+                    data["upload_status"] = "success"
+                    data["upload_url"] = f"https://huggingface.co/datasets/{repo_id}"
+                    with open(session_file, "w") as f:
+                        json.dump(data, f, indent=2)
+                    return True
+                except Exception:
+                    if attempt < max_retries - 1:
+                        import time
+                        wait_time = 2**attempt
+                        time.sleep(wait_time)
+                    else:
+                        # Final attempt failed
+                        data["upload_status"] = "failed"
+                        with open(session_file, "w") as f:
+                            json.dump(data, f, indent=2)
+                        return False
+        finally:
+            # Clean up temp file
+            try:
+                os.unlink(tmp_path)
+            except Exception:
+                pass
+    except Exception as e:
+        print(f"Error uploading session: {e}", file=sys.stderr)
+        return False
+def retry_failed_uploads(directory: str, repo_id: str):
+    """Retry all failed/pending uploads in a directory"""
+    log_dir = Path(directory)
+    if not log_dir.exists():
+        return
+    session_files = list(log_dir.glob("session_*.json"))
+    for filepath in session_files:
+        try:
+            with open(filepath, "r") as f:
+                data = json.load(f)
+            upload_status = data.get("upload_status", "unknown")
+            # Only retry pending or failed uploads
+            if upload_status in ["pending", "failed"]:
+                upload_session_as_file(str(filepath), repo_id)
+        except Exception:
+            pass
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print("Usage: session_uploader.py <command> <args...>")
+        sys.exit(1)
+    command = sys.argv[1]
+    if command == "upload":
+        # python session_uploader.py upload <session_file> <repo_id>
+        if len(sys.argv) < 4:
+            print("Usage: session_uploader.py upload <session_file> <repo_id>")
+            sys.exit(1)
+        session_file = sys.argv[2]
+        repo_id = sys.argv[3]
+        success = upload_session_as_file(session_file, repo_id)
+        sys.exit(0 if success else 1)
+    elif command == "retry":
+        # python session_uploader.py retry <directory> <repo_id>
+        if len(sys.argv) < 4:
+            print("Usage: session_uploader.py retry <directory> <repo_id>")
+            sys.exit(1)
+        directory = sys.argv[2]
+        repo_id = sys.argv[3]
+        retry_failed_uploads(directory, repo_id)
+        sys.exit(0)
+    else:
+        print(f"Unknown command: {command}")
+        sys.exit(1)

agent/main.py CHANGED Viewed

@@ -415,8 +415,7 @@ async def main():
     )
     await submission_queue.put(shutdown_submission)
-    # Wait for tasks to complete
-    await asyncio.wait_for(agent_task, timeout=2.0)
     listener_task.cancel()
     print("✨ Goodbye!\n")

     )
     await submission_queue.put(shutdown_submission)
+    await asyncio.wait_for(agent_task, timeout=5.0)
     listener_task.cancel()
     print("✨ Goodbye!\n")

configs/main_agent_config.json CHANGED Viewed

@@ -1,5 +1,7 @@
 {
-  "model_name": "anthropic/claude-sonnet-4-5-20250929",
   "mcpServers": {
     "hf-mcp-server": {
       "transport": "http",

 {
+  "model_name": "anthropic/claude-opus-4-5-20251101",
+  "save_sessions": true,
+  "session_dataset_repo": "smolagents/hf-agent-sessions",
   "mcpServers": {
     "hf-mcp-server": {
       "transport": "http",

pyproject.toml CHANGED Viewed

@@ -5,6 +5,7 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     # Core dependencies (always required)
     "pydantic>=2.12.3",
     "python-dotenv>=1.2.1",
@@ -22,6 +23,7 @@ agent = [
     "thefuzz>=0.22.1",
     "nbconvert>=7.16.6",
     "nbformat>=5.10.4",
 ]
 # Evaluation/benchmarking dependencies

 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "datasets>=4.4.1",
     # Core dependencies (always required)
     "pydantic>=2.12.3",
     "python-dotenv>=1.2.1",
     "thefuzz>=0.22.1",
     "nbconvert>=7.16.6",
     "nbformat>=5.10.4",
+    "datasets>=4.3.0",  # For session logging to HF datasets
 ]
 # Evaluation/benchmarking dependencies

uv.lock CHANGED Viewed

@@ -902,12 +902,14 @@ name = "hf-agent"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
 ]
 [package.optional-dependencies]
 agent = [
     { name = "fastmcp" },
     { name = "huggingface-hub" },
     { name = "litellm" },
@@ -946,6 +948,8 @@ eval = [
 [package.metadata]
 requires-dist = [
     { name = "datasets", marker = "extra == 'eval'", specifier = ">=4.3.0" },
     { name = "fastmcp", marker = "extra == 'agent'", specifier = ">=2.4.0" },
     { name = "hf-agent", extras = ["agent", "eval", "dev"], marker = "extra == 'all'" },

 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "datasets" },
     { name = "pydantic" },
     { name = "python-dotenv" },
 ]
 [package.optional-dependencies]
 agent = [
+    { name = "datasets" },
     { name = "fastmcp" },
     { name = "huggingface-hub" },
     { name = "litellm" },
 [package.metadata]
 requires-dist = [
+    { name = "datasets", specifier = ">=4.4.1" },
+    { name = "datasets", marker = "extra == 'agent'", specifier = ">=4.3.0" },
     { name = "datasets", marker = "extra == 'eval'", specifier = ">=4.3.0" },
     { name = "fastmcp", marker = "extra == 'agent'", specifier = ">=2.4.0" },
     { name = "hf-agent", extras = ["agent", "eval", "dev"], marker = "extra == 'all'" },