Upload submission from kinitro-agent-template

Browse files

Files changed (9) hide show

.gitignore +216 -0
.python-version +1 -0
README.md +5 -0
agent.capnp +13 -0
agent.py +165 -0
agent_interface.py +53 -0
agent_server.py +114 -0
main.py +66 -0
pyproject.toml +13 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,216 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# Redis
+*.rdb
+*.aof
+*.pid
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+# ActiveMQ
+activemq-data/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# Kinitro Agent Template
+A template to help you get started with submitting agents to Kinitro.
+The main Kinitro repository can be found [here](https://github.com/threetau/kinitro).

agent.capnp ADDED Viewed

	@@ -0,0 +1,13 @@

+@0x893bac407c81b48c;
+interface Agent {
+    struct Tensor {
+        data @0   :Data; # tensor bytes tensor.numpy().tobytes()
+        shape @1  :List(UInt64); # tensor shape list(tensor.shape())
+        dtype @2  :Text; # data type name tensor.dtype()
+    }
+    act @0 (obs :Data) -> (action :Tensor);
+    reset @1 () -> ();
+}

agent.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""
+Implementation of the AgentInterface for MetaWorld tasks.
+This agent uses the SawyerPickPlaceV2Policy from MetaWorld as an expert policy.
+"""
+import logging
+from typing import Any, Dict
+import gymnasium as gym
+import metaworld
+import numpy as np
+import torch
+from agent_interface import AgentInterface
+from metaworld.policies.sawyer_reach_v3_policy import SawyerReachV3Policy
+class RLAgent(AgentInterface):
+    """
+    MetaWorld agent implementation using the SawyerReachV3Policy expert policy.
+    This agent uses the expert policy from MetaWorld for reach tasks.
+    """
+    def __init__(
+        self,
+        observation_space: gym.Space | None = None,
+        action_space: gym.Space | None = None,
+        seed: int | None = None,
+        **kwargs,
+    ):
+        super().__init__(observation_space, action_space, seed, **kwargs)
+        self.logger = logging.getLogger(__name__)
+        self.logger.info(f"Initializing MetaWorld agent with seed {self.seed}")
+        self.policy = SawyerReachV3Policy()
+        self.logger.info("Successfully initialized SawyerReachV3Policy")
+        # Track episode state
+        self.episode_step = 0
+        self.max_episode_steps = kwargs.get("max_episode_steps", 200)
+        self.logger.info("MetaWorld agent initialized successfully")
+    def act(self, obs: Dict[str, Any], **kwargs) -> torch.Tensor:
+        """
+        Process the observation and return an action using the MetaWorld expert policy.
+        Args:
+            obs: Observation from the environment
+            kwargs: Additional arguments
+        Returns:
+            action: Action tensor to take in the environment
+        """
+        try:
+            # Process observation to extract the format needed by the expert policy
+            processed_obs = self._process_observation(obs)
+            # Use the expert policy (MetaWorld is always available)
+            # MetaWorld policies expect numpy arrays
+            action_numpy = self.policy.get_action(processed_obs)
+            action_tensor = torch.from_numpy(np.array(action_numpy)).float()
+            # Log occasionally
+            if self.episode_step % 50 == 0:
+                self.logger.debug(f"Using expert policy action: {action_numpy}")
+            # Increment episode step
+            self.episode_step += 1
+            # Occasionally log actions to avoid spam
+            if self.episode_step % 50 == 0:
+                self.logger.debug(
+                    f"Step {self.episode_step}: Action shape {action_tensor.shape}"
+                )
+            return action_tensor
+        except Exception as e:
+            self.logger.error(f"Error in act method: {e}", exc_info=True)
+            # Return zeros as a fallback
+            if isinstance(self.action_space, gym.spaces.Box):
+                return torch.zeros(self.action_space.shape[0], dtype=torch.float32)
+            else:
+                return torch.zeros(4, dtype=torch.float32)
+    def _process_observation(self, obs):
+        """
+        Helper method to process observations for the MetaWorld expert policy.
+        MetaWorld policies typically expect a specific observation format.
+        """
+        if isinstance(obs, dict):
+            # MetaWorld environment can return observations in different formats
+            if "observation" in obs:
+                # Standard format for goal-observable environments
+                processed_obs = obs["observation"]
+            elif "obs" in obs:
+                processed_obs = obs["obs"]
+            elif "state_observation" in obs:
+                # Some MetaWorld environments use this key
+                processed_obs = obs["state_observation"]
+            elif "goal_achieved" in obs:
+                # If we have information about goal achievement
+                # This might be needed for certain policy decisions
+                achievement = obs.get("goal_achieved", False)
+                base_obs = next(iter(obs.values()))
+                self.logger.debug(f"Goal achieved: {achievement}")
+                processed_obs = base_obs
+            else:
+                # If structure is unknown, use the first value
+                processed_obs = next(iter(obs.values()))
+                self.logger.debug(f"Using observation key: {next(iter(obs.keys()))}")
+        else:
+            # If already a numpy array or similar, use directly
+            processed_obs = obs
+        # Ensure we're returning a numpy array as expected by MetaWorld policies
+        if not isinstance(processed_obs, np.ndarray):
+            try:
+                processed_obs = np.array(processed_obs, dtype=np.float32)
+            except Exception as e:
+                self.logger.error(f"Failed to convert observation to numpy array: {e}")
+                # Return a dummy observation if conversion fails
+                if (
+                    self.observation_space
+                    and hasattr(self.observation_space, "shape")
+                    and self.observation_space.shape is not None
+                ):
+                    processed_obs = np.zeros(
+                        self.observation_space.shape, dtype=np.float32
+                    )
+                else:
+                    # Typical MetaWorld observation dimension if all else fails
+                    processed_obs = np.zeros(39, dtype=np.float32)
+        return processed_obs
+    def reset(self) -> None:
+        """
+        Reset agent state between episodes.
+        """
+        self.logger.debug("Resetting agent")
+        self.episode_step = 0
+        # Any other stateful components would be reset here
+    def _build_model(self):
+        """
+        Build a neural network model for the agent.
+        This is a placeholder for where you would define your neural network
+        architecture using PyTorch, TensorFlow, or another framework.
+        """
+        # Example of where you might build a simple PyTorch model
+        # model = torch.nn.Sequential(
+        #     torch.nn.Linear(self.observation_space.shape[0], 128),
+        #     torch.nn.ReLU(),
+        #     torch.nn.Linear(128, 64),
+        #     torch.nn.ReLU(),
+        #     torch.nn.Linear(64, self.action_space.shape[0]),
+        # )
+        # return model
+        pass

agent_interface.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Abstract base class defining the standard interface for all agents.
+All miner-submitted agents must implement this interface to be evaluated.
+"""
+from abc import ABC, abstractmethod
+import gymnasium as gym
+import numpy as np
+import torch
+class AgentInterface(ABC):
+    """
+    Standard interface that all miner implementations must follow.
+    This ensures a consistent contract between the evaluator and any submitted agent,
+    regardless of the underlying model architecture or implementation details.
+    """
+    def __init__(
+        self,
+        observation_space: gym.Space | None = None,
+        action_space: gym.Space | None = None,
+        seed: int | None = None,
+        **kwargs,
+    ):
+        self.observation_space = observation_space or gym.spaces.Box(
+            low=-1, high=1, shape=(100,), dtype=np.float32
+        )
+        self.action_space = action_space or gym.spaces.Box(
+            low=-1, high=1, shape=(4,), dtype=np.float32
+        )
+        self.seed = seed or np.random.randint(0, 1000000)
+        self.rng = np.random.default_rng(seed)
+    @abstractmethod
+    def act(self, obs: dict, **kwargs) -> torch.Tensor:
+        """
+        Take action given current observation and any additional arguments.
+        """
+        pass
+    def reset(self) -> None:
+        """
+        Reset agent state for new episode.
+        This is called at the beginning of each episode. Stateless agents
+        can implement this as a no-op. Agents with internal memory/history
+        should reset their state here.
+        """
+        pass

agent_server.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#!/usr/bin/env python3
+"""
+Cap'n Proto RPC Server for Agent Interface
+"""
+import asyncio
+import logging
+import os
+import pickle
+import numpy as np
+import torch
+import capnp
+# Load the schema
+schema_file = os.path.join(os.path.dirname(__file__), "agent.capnp")
+agent_capnp = capnp.load(schema_file)
+logger = logging.getLogger(__name__)
+class AgentServer(agent_capnp.Agent.Server):
+    """Cap'n Proto server implementation for AgentInterface"""
+    def __init__(self, agent):
+        self.agent = agent
+        self.logger = logging.getLogger(__name__)
+        self.logger.info("AgentServer initialized with agent: %s", type(agent).__name__)
+    async def act(self, obs, **kwargs):
+        """Handle act RPC call"""
+        try:
+            # Deserialize observation from bytes
+            observation = pickle.loads(obs)
+            # Call the agent's act method
+            action_tensor = self.agent.act(observation)
+            # Convert to numpy if it's a torch tensor
+            if isinstance(action_tensor, torch.Tensor):
+                action_numpy = action_tensor.detach().cpu().numpy()
+            else:
+                action_numpy = np.array(action_tensor)
+            # Prepare tensor response
+            response = agent_capnp.Agent.Tensor.new_message()
+            response.data = action_numpy.tobytes()
+            response.shape = list(action_numpy.shape)
+            response.dtype = str(action_numpy.dtype)
+            return response
+        except Exception as e:
+            self.logger.error(f"Error in act: {e}", exc_info=True)
+            raise
+    async def reset(self, **kwargs):
+        """Handle reset RPC call"""
+        try:
+            self.agent.reset()
+        except Exception as e:
+            self.logger.error(f"Error in reset: {e}", exc_info=True)
+            raise
+async def serve(agent, address="127.0.0.1", port=8000):
+    """Serve the agent using asyncio approach"""
+    async def new_connection(stream):
+        """Handler for each new client connection"""
+        try:
+            # Create TwoPartyServer for this connection
+            server = capnp.TwoPartyServer(stream, bootstrap=AgentServer(agent))
+            # Wait for the connection to disconnect
+            await server.on_disconnect()
+        except Exception as e:
+            logger.error(f"Error handling connection: {e}", exc_info=True)
+    # Create the server
+    server = await capnp.AsyncIoStream.create_server(new_connection, address, port)
+    logger.info(f"Agent RPC server listening on {address}:{port}")
+    try:
+        # Keep the server running
+        async with server:
+            await server.serve_forever()
+    except Exception as e:
+        logger.error(f"Server error: {e}", exc_info=True)
+    finally:
+        logger.info("Server shutting down")
+def start_server(agent, address="127.0.0.1", port=8000):
+    """Start server with proper asyncio event loop handling"""
+    async def run_server_with_kj():
+        async with capnp.kj_loop():
+            await serve(agent, address, port)
+    try:
+        asyncio.run(run_server_with_kj())
+    except KeyboardInterrupt:
+        logger.info("Server stopped by user")
+def run_server_in_process(agent, address="127.0.0.1", port=8000):
+    """Entry point for running server in a separate process"""
+    async def run_with_kj():
+        async with capnp.kj_loop():
+            await serve(agent, address, port)
+    asyncio.run(run_with_kj())

main.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python3
+"""
+Main entry point for the agent server.
+This script creates an agent implementation and starts the RPC server
+to handle requests from the evaluator.
+"""
+import argparse
+import logging
+import sys
+from agent import RLAgent
+from agent_server import start_server
+def setup_logging(level=logging.INFO):
+    """Configure logging."""
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(description="Start the agent server")
+    parser.add_argument(
+        "--host", type=str, default="*", help="Host to bind the server to"
+    )
+    parser.add_argument(
+        "--port", type=int, default=8000, help="Port to bind the server to"
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        help="Logging level",
+    )
+    args = parser.parse_args()
+    # Setup logging
+    log_level = getattr(logging, args.log_level)
+    setup_logging(log_level)
+    logger = logging.getLogger(__name__)
+    logger.info(f"Starting agent server on {args.host}:{args.port}")
+    # Create the RLAgent
+    agent = RLAgent()
+    # Start the server
+    try:
+        start_server(agent, args.host, args.port)
+    except KeyboardInterrupt:
+        logger.info("Server stopped by user")
+    except Exception as e:
+        logger.error(f"Error starting server: {e}", exc_info=True)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[project]
+name = "kinitro-agent"
+version = "0.0.1"
+description = "Kinitro Agent"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "metaworld>=3.0.0",
+    "torch>=2.8.0"
+]
+[dependency-groups]
+dev = ["debugpy>=1.8.9", "py-spy>=0.4.0", "pytest>=8.3.4", "ruff>=0.8.2"]