feat: setup first submission for kinitro

Browse files

Files changed (7) hide show

agent.capnp +13 -0
agent.py +165 -0
agent_interface.py +53 -0
agent_server.py +70 -0
main.py +66 -0
pyproject.toml +15 -0
uv.lock +0 -0

agent.capnp ADDED Viewed

	@@ -0,0 +1,13 @@

+@0x893bac407c81b48c
+interface Agent {
+    struct Tensor {
+        data @0   :Data; # tensor bytes tensor.numpy().tobytes()
+        shape @1  :List(UInt64); # tensor shape list(tensor.shape())
+        dtype @2  :Text; # data type name tensor.dtype()
+    }
+    act @0 (obs :Data) -> (action :Tensor);
+    reset @1 () -> ();
+}

agent.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""
+Implementation of the AgentInterface for MetaWorld tasks.
+This agent uses the SawyerPickPlaceV2Policy from MetaWorld as an expert policy.
+"""
+import logging
+from typing import Any, Dict
+import gymnasium as gym
+import metaworld
+import numpy as np
+import torch
+from agent_interface import AgentInterface
+from metaworld.policies import SawyerPickPlaceV2Policy
+class RLAgent(AgentInterface):
+    """
+    MetaWorld agent implementation using the SawyerPickPlaceV2Policy expert policy.
+    This agent uses the expert policy from MetaWorld for pick and place tasks.
+    """
+    def __init__(
+        self,
+        observation_space: gym.Space | None = None,
+        action_space: gym.Space | None = None,
+        seed: int | None = None,
+        **kwargs,
+    ):
+        super().__init__(observation_space, action_space, seed, **kwargs)
+        self.logger = logging.getLogger(__name__)
+        self.logger.info(f"Initializing MetaWorld agent with seed {self.seed}")
+        self.policy = SawyerPickPlaceV2Policy()
+        self.logger.info("Successfully initialized SawyerPickPlaceV2Policy")
+        # Track episode state
+        self.episode_step = 0
+        self.max_episode_steps = kwargs.get("max_episode_steps", 200)
+        self.logger.info("MetaWorld agent initialized successfully")
+    def act(self, obs: Dict[str, Any], **kwargs) -> torch.Tensor:
+        """
+        Process the observation and return an action using the MetaWorld expert policy.
+        Args:
+            obs: Observation from the environment
+            kwargs: Additional arguments
+        Returns:
+            action: Action tensor to take in the environment
+        """
+        try:
+            # Process observation to extract the format needed by the expert policy
+            processed_obs = self._process_observation(obs)
+            # Use the expert policy (MetaWorld is always available)
+            # MetaWorld policies expect numpy arrays
+            action_numpy = self.policy.get_action(processed_obs)
+            action_tensor = torch.from_numpy(np.array(action_numpy)).float()
+            # Log occasionally
+            if self.episode_step % 50 == 0:
+                self.logger.debug(f"Using expert policy action: {action_numpy}")
+            # Increment episode step
+            self.episode_step += 1
+            # Occasionally log actions to avoid spam
+            if self.episode_step % 50 == 0:
+                self.logger.debug(
+                    f"Step {self.episode_step}: Action shape {action_tensor.shape}"
+                )
+            return action_tensor
+        except Exception as e:
+            self.logger.error(f"Error in act method: {e}", exc_info=True)
+            # Return zeros as a fallback
+            if isinstance(self.action_space, gym.spaces.Box):
+                return torch.zeros(self.action_space.shape[0], dtype=torch.float32)
+            else:
+                return torch.zeros(4, dtype=torch.float32)
+    def _process_observation(self, obs):
+        """
+        Helper method to process observations for the MetaWorld expert policy.
+        MetaWorld policies typically expect a specific observation format.
+        """
+        if isinstance(obs, dict):
+            # MetaWorld environment can return observations in different formats
+            if "observation" in obs:
+                # Standard format for goal-observable environments
+                processed_obs = obs["observation"]
+            elif "obs" in obs:
+                processed_obs = obs["obs"]
+            elif "state_observation" in obs:
+                # Some MetaWorld environments use this key
+                processed_obs = obs["state_observation"]
+            elif "goal_achieved" in obs:
+                # If we have information about goal achievement
+                # This might be needed for certain policy decisions
+                achievement = obs.get("goal_achieved", False)
+                base_obs = next(iter(obs.values()))
+                self.logger.debug(f"Goal achieved: {achievement}")
+                processed_obs = base_obs
+            else:
+                # If structure is unknown, use the first value
+                processed_obs = next(iter(obs.values()))
+                self.logger.debug(f"Using observation key: {next(iter(obs.keys()))}")
+        else:
+            # If already a numpy array or similar, use directly
+            processed_obs = obs
+        # Ensure we're returning a numpy array as expected by MetaWorld policies
+        if not isinstance(processed_obs, np.ndarray):
+            try:
+                processed_obs = np.array(processed_obs, dtype=np.float32)
+            except Exception as e:
+                self.logger.error(f"Failed to convert observation to numpy array: {e}")
+                # Return a dummy observation if conversion fails
+                if (
+                    self.observation_space
+                    and hasattr(self.observation_space, "shape")
+                    and self.observation_space.shape is not None
+                ):
+                    processed_obs = np.zeros(
+                        self.observation_space.shape, dtype=np.float32
+                    )
+                else:
+                    # Typical MetaWorld observation dimension if all else fails
+                    processed_obs = np.zeros(39, dtype=np.float32)
+        return processed_obs
+    def reset(self) -> None:
+        """
+        Reset agent state between episodes.
+        """
+        self.logger.debug("Resetting agent")
+        self.episode_step = 0
+        # Any other stateful components would be reset here
+    def _build_model(self):
+        """
+        Build a neural network model for the agent.
+        This is a placeholder for where you would define your neural network
+        architecture using PyTorch, TensorFlow, or another framework.
+        """
+        # Example of where you might build a simple PyTorch model
+        # model = torch.nn.Sequential(
+        #     torch.nn.Linear(self.observation_space.shape[0], 128),
+        #     torch.nn.ReLU(),
+        #     torch.nn.Linear(128, 64),
+        #     torch.nn.ReLU(),
+        #     torch.nn.Linear(64, self.action_space.shape[0]),
+        # )
+        # return model
+        pass

agent_interface.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Abstract base class defining the standard interface for all agents.
+All miner-submitted agents must implement this interface to be evaluated.
+"""
+from abc import ABC, abstractmethod
+import gymnasium as gym
+import numpy as np
+import torch
+class AgentInterface(ABC):
+    """
+    Standard interface that all miner implementations must follow.
+    This ensures a consistent contract between the evaluator and any submitted agent,
+    regardless of the underlying model architecture or implementation details.
+    """
+    def __init__(
+        self,
+        observation_space: gym.Space | None = None,
+        action_space: gym.Space | None = None,
+        seed: int | None = None,
+        **kwargs,
+    ):
+        self.observation_space = observation_space or gym.spaces.Box(
+            low=-1, high=1, shape=(100,), dtype=np.float32
+        )
+        self.action_space = action_space or gym.spaces.Box(
+            low=-1, high=1, shape=(4,), dtype=np.float32
+        )
+        self.seed = seed or np.random.randint(0, 1000000)
+        self.rng = np.random.default_rng(seed)
+    @abstractmethod
+    def act(self, obs: dict, **kwargs) -> torch.Tensor:
+        """
+        Take action given current observation and any additional arguments.
+        """
+        pass
+    def reset(self) -> None:
+        """
+        Reset agent state for new episode.
+        This is called at the beginning of each episode. Stateless agents
+        can implement this as a no-op. Agents with internal memory/history
+        should reset their state here.
+        """
+        pass

agent_server.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# The agent server runs on the miner container. The host calls these functions
+import asyncio
+import logging
+import pickle
+import agent_capnp
+import capnp
+import numpy as np
+import torch
+from .agent_interface import AgentInterface
+class AgentServer(agent_capnp.Agent.Server):
+    def __init__(self, agent: AgentInterface):
+        self.agent = agent
+        self.logger = logging.getLogger(__name__)
+        self.logger.info("AgentServer initialized with agent: %s", type(agent).__name__)
+    async def act(self, obs, **kwargs):
+        try:
+            # Deserialize observation from bytes
+            observation = pickle.loads(obs)
+            # Call the agent's act method
+            action_tensor = self.agent.act(observation)
+            # Convert to numpy if it's a torch tensor
+            if isinstance(action_tensor, torch.Tensor):
+                action_numpy = action_tensor.detach().cpu().numpy()
+            else:
+                action_numpy = np.array(action_tensor)
+            # Prepare tensor response
+            response = agent_capnp.Agent.Tensor.new_message()
+            response.data = action_numpy.tobytes()
+            response.shape = list(action_numpy.shape)
+            response.dtype = str(action_numpy.dtype)
+            return response
+        except Exception as e:
+            self.logger.error(f"Error in act: {e}", exc_info=True)
+            raise
+    async def reset(self, **kwargs):
+        try:
+            self.agent.reset()
+        except Exception as e:
+            self.logger.error(f"Error in reset: {e}", exc_info=True)
+            raise
+async def serve(agent: AgentInterface, address="*", port=8000):
+    server = capnp.TwoPartyServer(address, port, bootstrap=AgentServer(agent))
+    logging.info(f"Agent RPC server listening on {address}:{port}")
+    # Keep the server running
+    try:
+        await server.run_forever()
+    finally:
+        server.close()
+def start_server(agent: AgentInterface, address="*", port=8000):
+    loop = asyncio.get_event_loop()
+    try:
+        loop.run_until_complete(serve(agent, address, port))
+    except KeyboardInterrupt:
+        logging.info("Server stopped by user")

main.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python3
+"""
+Main entry point for the agent server.
+This script creates an agent implementation and starts the RPC server
+to handle requests from the evaluator.
+"""
+import argparse
+import logging
+import sys
+from agent import RLAgent
+from agent_server import start_server
+def setup_logging(level=logging.INFO):
+    """Configure logging."""
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(description="Start the agent server")
+    parser.add_argument(
+        "--host", type=str, default="*", help="Host to bind the server to"
+    )
+    parser.add_argument(
+        "--port", type=int, default=8000, help="Port to bind the server to"
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        help="Logging level",
+    )
+    args = parser.parse_args()
+    # Setup logging
+    log_level = getattr(logging, args.log_level)
+    setup_logging(log_level)
+    logger = logging.getLogger(__name__)
+    logger.info(f"Starting agent server on {args.host}:{args.port}")
+    # Create the RLAgent
+    agent = RLAgent()
+    # Start the server
+    try:
+        start_server(agent, args.host, args.port)
+    except KeyboardInterrupt:
+        logger.info("Server stopped by user")
+    except Exception as e:
+        logger.error(f"Error starting server: {e}", exc_info=True)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "storb-rl-miner"
+version = "0.0.1"
+description = "Storb RL Subnet Miner CLI"
+readme = "README.md"
+license = { file = "LICENSE" }
+requires-python = ">=3.13"
+dependencies = [
+    "fiber @ git+https://github.com/storb-tech/fiber.git#egg=fiber[chain]",
+    "metaworld>=3.0.0",
+    "torch>=2.8.0"
+]
+[dependency-groups]
+dev = ["debugpy>=1.8.9", "py-spy>=0.4.0", "pytest>=8.3.4", "ruff>=0.8.2"]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff