koichi12 commited on Feb 12, 2025

Commit

b1f8d86

verified ·

1 Parent(s): 54753b9

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector_v2.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/registry.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/util.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/lambdas.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/mean_std_filter.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/obs_preproc.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/state_buffer.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/clip_reward.py +56 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/env_sampling.py +30 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/lambdas.py +86 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/mean_std_filter.py +187 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/obs_preproc.py +69 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/pipeline.py +72 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/state_buffer.py +120 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/synced_filter.py +52 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/view_requirement.py +135 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__init__.py +43 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_columns_from_episodes_to_train_batch.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_next_observations_from_episodes_to_train_batch.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/compute_returns_to_go.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/general_advantage_estimation.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/get_actions.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/module_to_env_pipeline.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/normalize_and_clip_actions.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/remove_single_ts_time_rank_from_batch.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/unbatch_to_individual_items.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/action_dist.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/catalog.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/distributions.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/modelv2.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/preprocessors.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/repeated_values.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/attention_net.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/fcnet.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/recurrent_net.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_action_dist.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_distributions.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_modelv2.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/visionnet.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__init__.py +17 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/gru_gate.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/multi_head_attention.cpython-311.pyc +0 -0

.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector.cpython-311.pyc ADDED Viewed

Binary file (21.6 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector_v2.cpython-311.pyc ADDED Viewed

Binary file (47.9 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/registry.cpython-311.pyc ADDED Viewed

Binary file (1.85 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/util.cpython-311.pyc ADDED Viewed

Binary file (8.07 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/lambdas.cpython-311.pyc ADDED Viewed

Binary file (4.16 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/mean_std_filter.cpython-311.pyc ADDED Viewed

Binary file (10.2 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/obs_preproc.cpython-311.pyc ADDED Viewed

Binary file (4.11 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/state_buffer.cpython-311.pyc ADDED Viewed

Binary file (6.36 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/clip_reward.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from typing import Any
+import numpy as np
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.typing import AgentConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+class ClipRewardAgentConnector(AgentConnector):
+    def __init__(self, ctx: ConnectorContext, sign=False, limit=None):
+        super().__init__(ctx)
+        assert (
+            not sign or not limit
+        ), "should not enable both sign and limit reward clipping."
+        self.sign = sign
+        self.limit = limit
+    def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+        d = ac_data.data
+        assert (
+            type(d) is dict
+        ), "Single agent data must be of type Dict[str, TensorStructType]"
+        if SampleBatch.REWARDS not in d:
+            # Nothing to clip. May happen for initial obs.
+            return ac_data
+        if self.sign:
+            d[SampleBatch.REWARDS] = np.sign(d[SampleBatch.REWARDS])
+        elif self.limit:
+            d[SampleBatch.REWARDS] = np.clip(
+                d[SampleBatch.REWARDS],
+                a_min=-self.limit,
+                a_max=self.limit,
+            )
+        return ac_data
+    def to_state(self):
+        return ClipRewardAgentConnector.__name__, {
+            "sign": self.sign,
+            "limit": self.limit,
+        }
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return ClipRewardAgentConnector(ctx, **params)
+register_connector(ClipRewardAgentConnector.__name__, ClipRewardAgentConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/env_sampling.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from typing import Any
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.utils.typing import AgentConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+class EnvSamplingAgentConnector(AgentConnector):
+    def __init__(self, ctx: ConnectorContext, sign=False, limit=None):
+        super().__init__(ctx)
+        self.observation_space = ctx.observation_space
+    def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+        # EnvSamplingAgentConnector is a no-op connector.
+        return ac_data
+    def to_state(self):
+        return EnvSamplingAgentConnector.__name__, {}
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return EnvSamplingAgentConnector(ctx, **params)
+register_connector(EnvSamplingAgentConnector.__name__, EnvSamplingAgentConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/lambdas.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from typing import Any, Callable, Type
+import numpy as np
+import tree  # dm_tree
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.typing import (
+    AgentConnectorDataType,
+    AgentConnectorsOutput,
+)
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+def register_lambda_agent_connector(
+    name: str, fn: Callable[[Any], Any]
+) -> Type[AgentConnector]:
+    """A util to register any simple transforming function as an AgentConnector
+    The only requirement is that fn should take a single data object and return
+    a single data object.
+    Args:
+        name: Name of the resulting actor connector.
+        fn: The function that transforms env / agent data.
+    Returns:
+        A new AgentConnector class that transforms data using fn.
+    """
+    class LambdaAgentConnector(AgentConnector):
+        def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+            return AgentConnectorDataType(
+                ac_data.env_id, ac_data.agent_id, fn(ac_data.data)
+            )
+        def to_state(self):
+            return name, None
+        @staticmethod
+        def from_state(ctx: ConnectorContext, params: Any):
+            return LambdaAgentConnector(ctx)
+    LambdaAgentConnector.__name__ = name
+    LambdaAgentConnector.__qualname__ = name
+    register_connector(name, LambdaAgentConnector)
+    return LambdaAgentConnector
+@OldAPIStack
+def flatten_data(data: AgentConnectorsOutput):
+    assert isinstance(
+        data, AgentConnectorsOutput
+    ), "Single agent data must be of type AgentConnectorsOutput"
+    raw_dict = data.raw_dict
+    sample_batch = data.sample_batch
+    flattened = {}
+    for k, v in sample_batch.items():
+        if k in [SampleBatch.INFOS, SampleBatch.ACTIONS] or k.startswith("state_out_"):
+            # Do not flatten infos, actions, and state_out_ columns.
+            flattened[k] = v
+            continue
+        if v is None:
+            # Keep the same column shape.
+            flattened[k] = None
+            continue
+        flattened[k] = np.array(tree.flatten(v))
+    flattened = SampleBatch(flattened, is_training=False)
+    return AgentConnectorsOutput(raw_dict, flattened)
+# Agent connector to build and return a flattened observation SampleBatch
+# in addition to the original input dict.
+FlattenDataAgentConnector = OldAPIStack(
+    register_lambda_agent_connector("FlattenDataAgentConnector", flatten_data)
+)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/mean_std_filter.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from typing import Any, List
+from gymnasium.spaces import Discrete, MultiDiscrete
+import numpy as np
+import tree
+from ray.rllib.connectors.agent.synced_filter import SyncedFilterAgentConnector
+from ray.rllib.connectors.connector import AgentConnector
+from ray.rllib.connectors.connector import (
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.utils.filter import Filter
+from ray.rllib.utils.filter import MeanStdFilter, ConcurrentMeanStdFilter
+from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
+from ray.rllib.utils.typing import AgentConnectorDataType
+from ray.rllib.utils.filter import RunningStat
+@OldAPIStack
+class MeanStdObservationFilterAgentConnector(SyncedFilterAgentConnector):
+    """A connector used to mean-std-filter observations.
+    Incoming observations are filtered such that the output of this filter is on
+    average zero and has a standard deviation of 1. This filtering is applied
+    separately per element of the observation space.
+    """
+    def __init__(
+        self,
+        ctx: ConnectorContext,
+        demean: bool = True,
+        destd: bool = True,
+        clip: float = 10.0,
+    ):
+        SyncedFilterAgentConnector.__init__(self, ctx)
+        # We simply use the old MeanStdFilter until non-connector env_runner is fully
+        # deprecated to avoid duplicate code
+        filter_shape = tree.map_structure(
+            lambda s: (
+                None
+                if isinstance(s, (Discrete, MultiDiscrete))  # noqa
+                else np.array(s.shape)
+            ),
+            get_base_struct_from_space(ctx.observation_space),
+        )
+        self.filter = MeanStdFilter(filter_shape, demean=demean, destd=destd, clip=clip)
+    def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+        d = ac_data.data
+        assert (
+            type(d) is dict
+        ), "Single agent data must be of type Dict[str, TensorStructType]"
+        if SampleBatch.OBS in d:
+            d[SampleBatch.OBS] = self.filter(
+                d[SampleBatch.OBS], update=self._is_training
+            )
+        if SampleBatch.NEXT_OBS in d:
+            d[SampleBatch.NEXT_OBS] = self.filter(
+                d[SampleBatch.NEXT_OBS], update=self._is_training
+            )
+        return ac_data
+    def to_state(self):
+        # Flattening is deterministic
+        flattened_rs = tree.flatten(self.filter.running_stats)
+        flattened_buffer = tree.flatten(self.filter.buffer)
+        return MeanStdObservationFilterAgentConnector.__name__, {
+            "shape": self.filter.shape,
+            "no_preprocessor": self.filter.no_preprocessor,
+            "demean": self.filter.demean,
+            "destd": self.filter.destd,
+            "clip": self.filter.clip,
+            "running_stats": [s.to_state() for s in flattened_rs],
+            "buffer": [s.to_state() for s in flattened_buffer],
+        }
+    # demean, destd, clip, and a state dict
+    @staticmethod
+    def from_state(
+        ctx: ConnectorContext,
+        params: List[Any] = None,
+        demean: bool = True,
+        destd: bool = True,
+        clip: float = 10.0,
+    ):
+        connector = MeanStdObservationFilterAgentConnector(ctx, demean, destd, clip)
+        if params:
+            connector.filter.shape = params["shape"]
+            connector.filter.no_preprocessor = params["no_preprocessor"]
+            connector.filter.demean = params["demean"]
+            connector.filter.destd = params["destd"]
+            connector.filter.clip = params["clip"]
+            # Unflattening is deterministic
+            running_stats = [RunningStat.from_state(s) for s in params["running_stats"]]
+            connector.filter.running_stats = tree.unflatten_as(
+                connector.filter.shape, running_stats
+            )
+            # Unflattening is deterministic
+            buffer = [RunningStat.from_state(s) for s in params["buffer"]]
+            connector.filter.buffer = tree.unflatten_as(connector.filter.shape, buffer)
+        return connector
+    def reset_state(self) -> None:
+        """Creates copy of current state and resets accumulated state"""
+        if not self._is_training:
+            raise ValueError(
+                "State of {} can only be changed when trainin.".format(self.__name__)
+            )
+        self.filter.reset_buffer()
+    def apply_changes(self, other: "Filter", *args, **kwargs) -> None:
+        """Updates self with state from other filter."""
+        # inline this as soon as we deprecate ordinary filter with non-connector
+        # env_runner
+        if not self._is_training:
+            raise ValueError(
+                "Changes can only be applied to {} when trainin.".format(self.__name__)
+            )
+        return self.filter.apply_changes(other, *args, **kwargs)
+    def copy(self) -> "Filter":
+        """Creates a new object with same state as self.
+        This is a legacy Filter method that we need to keep around for now
+        Returns:
+            A copy of self.
+        """
+        # inline this as soon as we deprecate ordinary filter with non-connector
+        # env_runner
+        return self.filter.copy()
+    def sync(self, other: "AgentConnector") -> None:
+        """Copies all state from other filter to self."""
+        # inline this as soon as we deprecate ordinary filter with non-connector
+        # env_runner
+        if not self._is_training:
+            raise ValueError(
+                "{} can only be synced when trainin.".format(self.__name__)
+            )
+        return self.filter.sync(other.filter)
+@OldAPIStack
+class ConcurrentMeanStdObservationFilterAgentConnector(
+    MeanStdObservationFilterAgentConnector
+):
+    """A concurrent version of the MeanStdObservationFilterAgentConnector.
+    This version's filter has all operations wrapped by a threading.RLock.
+    It can therefore be safely used by multiple threads.
+    """
+    def __init__(self, ctx: ConnectorContext, demean=True, destd=True, clip=10.0):
+        SyncedFilterAgentConnector.__init__(self, ctx)
+        # We simply use the old MeanStdFilter until non-connector env_runner is fully
+        # deprecated to avoid duplicate code
+        filter_shape = tree.map_structure(
+            lambda s: (
+                None
+                if isinstance(s, (Discrete, MultiDiscrete))  # noqa
+                else np.array(s.shape)
+            ),
+            get_base_struct_from_space(ctx.observation_space),
+        )
+        self.filter = ConcurrentMeanStdFilter(
+            filter_shape, demean=True, destd=True, clip=10.0
+        )
+register_connector(
+    MeanStdObservationFilterAgentConnector.__name__,
+    MeanStdObservationFilterAgentConnector,
+)
+register_connector(
+    ConcurrentMeanStdObservationFilterAgentConnector.__name__,
+    ConcurrentMeanStdObservationFilterAgentConnector,
+)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/obs_preproc.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from typing import Any
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.models.preprocessors import get_preprocessor, NoPreprocessor
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.typing import AgentConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+class ObsPreprocessorConnector(AgentConnector):
+    """A connector that wraps around existing RLlib observation preprocessors.
+    This includes:
+    - OneHotPreprocessor for Discrete and Multi-Discrete spaces.
+    - GenericPixelPreprocessor and AtariRamPreprocessor for Atari spaces.
+    - TupleFlatteningPreprocessor and DictFlatteningPreprocessor for flattening
+      arbitrary nested input observations.
+    - RepeatedValuesPreprocessor for padding observations from RLlib Repeated
+      observation space.
+    """
+    def __init__(self, ctx: ConnectorContext):
+        super().__init__(ctx)
+        if hasattr(ctx.observation_space, "original_space"):
+            # ctx.observation_space is the space this Policy deals with.
+            # We need to preprocess data from the original observation space here.
+            obs_space = ctx.observation_space.original_space
+        else:
+            obs_space = ctx.observation_space
+        self._preprocessor = get_preprocessor(obs_space)(
+            obs_space, ctx.config.get("model", {})
+        )
+    def is_identity(self):
+        """Returns whether this preprocessor connector is a no-op preprocessor."""
+        return isinstance(self._preprocessor, NoPreprocessor)
+    def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+        d = ac_data.data
+        assert type(d) is dict, (
+            "Single agent data must be of type Dict[str, TensorStructType] but is of "
+            "type {}".format(type(d))
+        )
+        if SampleBatch.OBS in d:
+            d[SampleBatch.OBS] = self._preprocessor.transform(d[SampleBatch.OBS])
+        if SampleBatch.NEXT_OBS in d:
+            d[SampleBatch.NEXT_OBS] = self._preprocessor.transform(
+                d[SampleBatch.NEXT_OBS]
+            )
+        return ac_data
+    def to_state(self):
+        return ObsPreprocessorConnector.__name__, None
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return ObsPreprocessorConnector(ctx)
+register_connector(ObsPreprocessorConnector.__name__, ObsPreprocessorConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/pipeline.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import logging
+from typing import Any, List
+from collections import defaultdict
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    Connector,
+    ConnectorContext,
+    ConnectorPipeline,
+)
+from ray.rllib.connectors.registry import get_connector, register_connector
+from ray.rllib.utils.typing import ActionConnectorDataType, AgentConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.util.timer import _Timer
+logger = logging.getLogger(__name__)
+@OldAPIStack
+class AgentConnectorPipeline(ConnectorPipeline, AgentConnector):
+    def __init__(self, ctx: ConnectorContext, connectors: List[Connector]):
+        super().__init__(ctx, connectors)
+        self.timers = defaultdict(_Timer)
+    def reset(self, env_id: str):
+        for c in self.connectors:
+            c.reset(env_id)
+    def on_policy_output(self, output: ActionConnectorDataType):
+        for c in self.connectors:
+            c.on_policy_output(output)
+    def __call__(
+        self, acd_list: List[AgentConnectorDataType]
+    ) -> List[AgentConnectorDataType]:
+        ret = acd_list
+        for c in self.connectors:
+            timer = self.timers[str(c)]
+            with timer:
+                ret = c(ret)
+        return ret
+    def to_state(self):
+        children = []
+        for c in self.connectors:
+            state = c.to_state()
+            assert isinstance(state, tuple) and len(state) == 2, (
+                "Serialized connector state must be in the format of "
+                f"Tuple[name: str, params: Any]. Instead we got {state}"
+                f"for connector {c.__name__}."
+            )
+            children.append(state)
+        return AgentConnectorPipeline.__name__, children
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: List[Any]):
+        assert (
+            type(params) is list
+        ), "AgentConnectorPipeline takes a list of connector params."
+        connectors = []
+        for state in params:
+            try:
+                name, subparams = state
+                connectors.append(get_connector(name, ctx, subparams))
+            except Exception as e:
+                logger.error(f"Failed to de-serialize connector state: {state}")
+                raise e
+        return AgentConnectorPipeline(ctx, connectors)
+register_connector(AgentConnectorPipeline.__name__, AgentConnectorPipeline)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/state_buffer.py ADDED Viewed

	@@ -0,0 +1,120 @@

+from collections import defaultdict
+import logging
+import pickle
+from typing import Any
+import numpy as np
+from ray.rllib.utils.annotations import override
+import tree  # dm_tree
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    Connector,
+    ConnectorContext,
+)
+from ray import cloudpickle
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.core.columns import Columns
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
+from ray.rllib.utils.typing import ActionConnectorDataType, AgentConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+logger = logging.getLogger(__name__)
+@OldAPIStack
+class StateBufferConnector(AgentConnector):
+    def __init__(self, ctx: ConnectorContext, states: Any = None):
+        super().__init__(ctx)
+        self._initial_states = ctx.initial_states
+        self._action_space_struct = get_base_struct_from_space(ctx.action_space)
+        self._states = defaultdict(lambda: defaultdict(lambda: (None, None, None)))
+        self._enable_new_api_stack = False
+        # TODO(jungong) : we would not need this if policies are never stashed
+        # during the rollout of a single episode.
+        if states:
+            try:
+                self._states = cloudpickle.loads(states)
+            except pickle.UnpicklingError:
+                # StateBufferConnector states are only needed for rare cases
+                # like stashing then restoring a policy during the rollout of
+                # a single episode.
+                # It is ok to ignore the error for most of the cases here.
+                logger.info(
+                    "Can not restore StateBufferConnector states. This warning can "
+                    "usually be ignore, unless it is from restoring a stashed policy."
+                )
+    @override(Connector)
+    def in_eval(self):
+        super().in_eval()
+    def reset(self, env_id: str):
+        # States should not be carried over between episodes.
+        if env_id in self._states:
+            del self._states[env_id]
+    def on_policy_output(self, ac_data: ActionConnectorDataType):
+        # Buffer latest output states for next input __call__.
+        self._states[ac_data.env_id][ac_data.agent_id] = ac_data.output
+    def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+        d = ac_data.data
+        assert (
+            type(d) is dict
+        ), "Single agent data must be of type Dict[str, TensorStructType]"
+        env_id = ac_data.env_id
+        agent_id = ac_data.agent_id
+        assert (
+            env_id is not None and agent_id is not None
+        ), f"StateBufferConnector requires env_id(f{env_id}) and agent_id(f{agent_id})"
+        action, states, fetches = self._states[env_id][agent_id]
+        if action is not None:
+            d[SampleBatch.ACTIONS] = action  # Last action
+        else:
+            # Default zero action.
+            d[SampleBatch.ACTIONS] = tree.map_structure(
+                lambda s: np.zeros_like(s.sample(), s.dtype)
+                if hasattr(s, "dtype")
+                else np.zeros_like(s.sample()),
+                self._action_space_struct,
+            )
+        if states is None:
+            states = self._initial_states
+        if self._enable_new_api_stack:
+            if states:
+                d[Columns.STATE_OUT] = states
+        else:
+            for i, v in enumerate(states):
+                d["state_out_{}".format(i)] = v
+        # Also add extra fetches if available.
+        if fetches:
+            d.update(fetches)
+        return ac_data
+    def to_state(self):
+        # Note(jungong) : it is ok to use cloudpickle here for stats because:
+        # 1. self._states may contain arbitary data objects, and will be hard
+        #     to serialize otherwise.
+        # 2. seriazlized states are only useful if a policy is stashed and
+        #     restored during the rollout of a single episode. So it is ok to
+        #     use cloudpickle for such non-persistent data bits.
+        states = cloudpickle.dumps(self._states)
+        return StateBufferConnector.__name__, states
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return StateBufferConnector(ctx, params)
+register_connector(StateBufferConnector.__name__, StateBufferConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/synced_filter.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    ConnectorContext,
+)
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.utils.filter import Filter
+@OldAPIStack
+class SyncedFilterAgentConnector(AgentConnector):
+    """An agent connector that filters with synchronized parameters."""
+    def __init__(self, ctx: ConnectorContext, *args, **kwargs):
+        super().__init__(ctx)
+        if args or kwargs:
+            raise ValueError(
+                "SyncedFilterAgentConnector does not take any additional arguments, "
+                "but got args=`{}` and kwargs={}.".format(args, kwargs)
+            )
+    def apply_changes(self, other: "Filter", *args, **kwargs) -> None:
+        """Updates self with state from other filter."""
+        # TODO: (artur) inline this as soon as we deprecate ordinary filter with
+        #  non-connecto env_runner
+        return self.filter.apply_changes(other, *args, **kwargs)
+    def copy(self) -> "Filter":
+        """Creates a new object with same state as self.
+        This is a legacy Filter method that we need to keep around for now
+        Returns:
+            A copy of self.
+        """
+        # inline this as soon as we deprecate ordinary filter with non-connector
+        # env_runner
+        return self.filter.copy()
+    def sync(self, other: "AgentConnector") -> None:
+        """Copies all state from other filter to self."""
+        # TODO: (artur) inline this as soon as we deprecate ordinary filter with
+        #  non-connector env_runner
+        return self.filter.sync(other.filter)
+    def reset_state(self) -> None:
+        """Creates copy of current state and resets accumulated state"""
+        raise NotImplementedError
+    def as_serializable(self) -> "Filter":
+        # TODO: (artur) inline this as soon as we deprecate ordinary filter with
+        #  non-connector env_runner
+        return self.filter.as_serializable()

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/view_requirement.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from collections import defaultdict
+from typing import Any
+from ray.rllib.connectors.connector import (
+    AgentConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.typing import (
+    AgentConnectorDataType,
+    AgentConnectorsOutput,
+)
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.evaluation.collectors.agent_collector import AgentCollector
+@OldAPIStack
+class ViewRequirementAgentConnector(AgentConnector):
+    """This connector does 2 things:
+    1. It filters data columns based on view_requirements for training and inference.
+    2. It buffers the right amount of history for computing the sample batch for
+       action computation.
+    The output of this connector is AgentConnectorsOut, which basically is
+    a tuple of 2 things:
+    {
+        "raw_dict": {"obs": ...}
+        "sample_batch": SampleBatch
+    }
+    raw_dict, which contains raw data for the latest time slice,
+    can be used to construct a complete episode by Sampler for training purpose.
+    The "for_action" SampleBatch can be used to directly call the policy.
+    """
+    def __init__(self, ctx: ConnectorContext):
+        super().__init__(ctx)
+        self._view_requirements = ctx.view_requirements
+        _enable_new_api_stack = False
+        # a dict of env_id to a dict of agent_id to a list of agent_collector objects
+        self.agent_collectors = defaultdict(
+            lambda: defaultdict(
+                lambda: AgentCollector(
+                    self._view_requirements,
+                    max_seq_len=ctx.config["model"]["max_seq_len"],
+                    intial_states=ctx.initial_states,
+                    disable_action_flattening=ctx.config.get(
+                        "_disable_action_flattening", False
+                    ),
+                    is_policy_recurrent=ctx.is_policy_recurrent,
+                    # Note(jungong): We only leverage AgentCollector for building sample
+                    # batches for computing actions.
+                    # So regardless of whether this ViewRequirement connector is in
+                    # training or inference mode, we should tell these AgentCollectors
+                    # to behave in inference mode, so they don't accumulate episode data
+                    # that is not useful for inference.
+                    is_training=False,
+                    _enable_new_api_stack=_enable_new_api_stack,
+                )
+            )
+        )
+    def reset(self, env_id: str):
+        if env_id in self.agent_collectors:
+            del self.agent_collectors[env_id]
+    def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType:
+        d = ac_data.data
+        assert (
+            type(d) is dict
+        ), "Single agent data must be of type Dict[str, TensorStructType]"
+        env_id = ac_data.env_id
+        agent_id = ac_data.agent_id
+        # TODO: we don't keep episode_id around so use env_id as episode_id ?
+        episode_id = env_id if SampleBatch.EPS_ID not in d else d[SampleBatch.EPS_ID]
+        assert env_id is not None and agent_id is not None, (
+            f"ViewRequirementAgentConnector requires env_id({env_id}) "
+            "and agent_id({agent_id})"
+        )
+        assert (
+            self._view_requirements
+        ), "ViewRequirements required by ViewRequirementAgentConnector"
+        # Note(jungong) : we need to keep the entire input dict here.
+        # A column may be used by postprocessing (GAE) even if its
+        # view_requirement.used_for_training is False.
+        training_dict = d
+        agent_collector = self.agent_collectors[env_id][agent_id]
+        if SampleBatch.NEXT_OBS not in d:
+            raise ValueError(f"connector data {d} should contain next_obs.")
+        # TODO(avnishn; kourosh) Unsure how agent_index is necessary downstream
+        # since there is no mapping from agent_index to agent_id that exists.
+        # need to remove this from the SampleBatch later.
+        # fall back to using dummy index if no index is available
+        if SampleBatch.AGENT_INDEX in d:
+            agent_index = d[SampleBatch.AGENT_INDEX]
+        else:
+            try:
+                agent_index = float(agent_id)
+            except ValueError:
+                agent_index = -1
+        if agent_collector.is_empty():
+            agent_collector.add_init_obs(
+                episode_id=episode_id,
+                agent_index=agent_index,
+                env_id=env_id,
+                init_obs=d[SampleBatch.NEXT_OBS],
+                init_infos=d.get(SampleBatch.INFOS),
+            )
+        else:
+            agent_collector.add_action_reward_next_obs(d)
+        sample_batch = agent_collector.build_for_inference()
+        return_data = AgentConnectorDataType(
+            env_id, agent_id, AgentConnectorsOutput(training_dict, sample_batch)
+        )
+        return return_data
+    def to_state(self):
+        return ViewRequirementAgentConnector.__name__, None
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return ViewRequirementAgentConnector(ctx)
+register_connector(
+    ViewRequirementAgentConnector.__name__, ViewRequirementAgentConnector
+)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__init__.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import (
+    AddObservationsFromEpisodesToBatch,
+)
+from ray.rllib.connectors.common.add_states_from_episodes_to_batch import (
+    AddStatesFromEpisodesToBatch,
+)
+from ray.rllib.connectors.common.add_time_dim_to_batch_and_zero_pad import (
+    AddTimeDimToBatchAndZeroPad,
+)
+from ray.rllib.connectors.common.agent_to_module_mapping import AgentToModuleMapping
+from ray.rllib.connectors.common.batch_individual_items import BatchIndividualItems
+from ray.rllib.connectors.common.numpy_to_tensor import NumpyToTensor
+from ray.rllib.connectors.learner.add_columns_from_episodes_to_train_batch import (
+    AddColumnsFromEpisodesToTrainBatch,
+)
+from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import (  # noqa
+    AddNextObservationsFromEpisodesToTrainBatch,
+)
+from ray.rllib.connectors.learner.add_one_ts_to_episodes_and_truncate import (
+    AddOneTsToEpisodesAndTruncate,
+)
+from ray.rllib.connectors.learner.compute_returns_to_go import ComputeReturnsToGo
+from ray.rllib.connectors.learner.general_advantage_estimation import (
+    GeneralAdvantageEstimation,
+)
+from ray.rllib.connectors.learner.learner_connector_pipeline import (
+    LearnerConnectorPipeline,
+)
+__all__ = [
+    "AddColumnsFromEpisodesToTrainBatch",
+    "AddNextObservationsFromEpisodesToTrainBatch",
+    "AddObservationsFromEpisodesToBatch",
+    "AddOneTsToEpisodesAndTruncate",
+    "AddStatesFromEpisodesToBatch",
+    "AddTimeDimToBatchAndZeroPad",
+    "AgentToModuleMapping",
+    "BatchIndividualItems",
+    "ComputeReturnsToGo",
+    "GeneralAdvantageEstimation",
+    "LearnerConnectorPipeline",
+    "NumpyToTensor",
+]

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.91 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_columns_from_episodes_to_train_batch.cpython-311.pyc ADDED Viewed

Binary file (7.62 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_next_observations_from_episodes_to_train_batch.cpython-311.pyc ADDED Viewed

Binary file (4.92 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/compute_returns_to_go.cpython-311.pyc ADDED Viewed

Binary file (3.14 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/general_advantage_estimation.cpython-311.pyc ADDED Viewed

Binary file (8.74 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.29 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/get_actions.cpython-311.pyc ADDED Viewed

Binary file (4.55 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/module_to_env_pipeline.cpython-311.pyc ADDED Viewed

Binary file (714 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/normalize_and_clip_actions.cpython-311.pyc ADDED Viewed

Binary file (7.29 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/remove_single_ts_time_rank_from_batch.cpython-311.pyc ADDED Viewed

Binary file (3.6 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/unbatch_to_individual_items.cpython-311.pyc ADDED Viewed

Binary file (4.47 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (596 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/action_dist.cpython-311.pyc ADDED Viewed

Binary file (5.15 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/catalog.cpython-311.pyc ADDED Viewed

Binary file (35.1 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/distributions.cpython-311.pyc ADDED Viewed

Binary file (10.8 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/modelv2.cpython-311.pyc ADDED Viewed

Binary file (21.7 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/preprocessors.cpython-311.pyc ADDED Viewed

Binary file (26.7 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/repeated_values.cpython-311.pyc ADDED Viewed

Binary file (10.5 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (9.93 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (584 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/attention_net.cpython-311.pyc ADDED Viewed

Binary file (28.4 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/fcnet.cpython-311.pyc ADDED Viewed

Binary file (7.03 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/recurrent_net.cpython-311.pyc ADDED Viewed

Binary file (14.4 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_action_dist.cpython-311.pyc ADDED Viewed

Binary file (51.8 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_distributions.cpython-311.pyc ADDED Viewed

Binary file (34.1 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_modelv2.cpython-311.pyc ADDED Viewed

Binary file (8.26 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/visionnet.cpython-311.pyc ADDED Viewed

Binary file (10.5 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from ray.rllib.models.tf.layers.gru_gate import GRUGate
+from ray.rllib.models.tf.layers.noisy_layer import NoisyLayer
+from ray.rllib.models.tf.layers.relative_multi_head_attention import (
+    PositionalEmbedding,
+    RelativeMultiHeadAttention,
+)
+from ray.rllib.models.tf.layers.skip_connection import SkipConnection
+from ray.rllib.models.tf.layers.multi_head_attention import MultiHeadAttention
+__all__ = [
+    "GRUGate",
+    "MultiHeadAttention",
+    "NoisyLayer",
+    "PositionalEmbedding",
+    "RelativeMultiHeadAttention",
+    "SkipConnection",
+]

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (814 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/gru_gate.cpython-311.pyc ADDED Viewed

Binary file (4.21 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/multi_head_attention.cpython-311.pyc ADDED Viewed

Binary file (4.23 kB). View file