koichi12 commited on Feb 12, 2025

Commit

54753b9

verified ·

1 Parent(s): 1f700b9

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/ray/rllib/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/clip.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/immutable.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/lambdas.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/normalize.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/pipeline.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/clip.py +41 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/immutable.py +40 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/lambdas.py +76 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/normalize.py +44 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/pipeline.py +61 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/clip_reward.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/env_sampling.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/pipeline.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/synced_filter.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/view_requirement.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__init__.py +22 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_observations_from_episodes_to_batch.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_states_from_episodes_to_batch.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_time_dim_to_batch_and_zero_pad.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/agent_to_module_mapping.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/batch_individual_items.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/frame_stacking.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/module_to_agent_unmapping.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/numpy_to_tensor.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/tensor_to_numpy.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_observations_from_episodes_to_batch.py +180 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_states_from_episodes_to_batch.py +348 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_time_dim_to_batch_and_zero_pad.py +302 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/agent_to_module_mapping.py +291 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/batch_individual_items.py +200 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/frame_stacking.py +147 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/module_to_agent_unmapping.py +48 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/numpy_to_tensor.py +125 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/tensor_to_numpy.py +26 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/env_to_module_pipeline.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/flatten_observations.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/frame_stacking.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/observation_preprocessor.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/write_observations_to_episodes.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_one_ts_to_episodes_and_truncate.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/frame_stacking.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/learner_connector_pipeline.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py +166 -0
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py +103 -0

.venv/lib/python3.11/site-packages/ray/rllib/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (2.5 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (200 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/clip.cpython-311.pyc ADDED Viewed

Binary file (2.7 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/immutable.cpython-311.pyc ADDED Viewed

Binary file (2.3 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/lambdas.cpython-311.pyc ADDED Viewed

Binary file (3.73 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/normalize.cpython-311.pyc ADDED Viewed

Binary file (2.75 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/pipeline.cpython-311.pyc ADDED Viewed

Binary file (4.02 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/clip.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import Any
+from ray.rllib.connectors.connector import (
+    ActionConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.utils.spaces.space_utils import clip_action, get_base_struct_from_space
+from ray.rllib.utils.typing import ActionConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+class ClipActionsConnector(ActionConnector):
+    def __init__(self, ctx: ConnectorContext):
+        super().__init__(ctx)
+        self._action_space_struct = get_base_struct_from_space(ctx.action_space)
+    def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
+        assert isinstance(
+            ac_data.output, tuple
+        ), "Action connector requires PolicyOutputType data."
+        actions, states, fetches = ac_data.output
+        return ActionConnectorDataType(
+            ac_data.env_id,
+            ac_data.agent_id,
+            ac_data.input_dict,
+            (clip_action(actions, self._action_space_struct), states, fetches),
+        )
+    def to_state(self):
+        return ClipActionsConnector.__name__, None
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return ClipActionsConnector(ctx)
+register_connector(ClipActionsConnector.__name__, ClipActionsConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/immutable.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from typing import Any
+import tree  # pip install dm_tree
+from ray.rllib.connectors.connector import (
+    ActionConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.utils.numpy import make_action_immutable
+from ray.rllib.utils.typing import ActionConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+class ImmutableActionsConnector(ActionConnector):
+    def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
+        assert isinstance(
+            ac_data.output, tuple
+        ), "Action connector requires PolicyOutputType data."
+        actions, states, fetches = ac_data.output
+        tree.traverse(make_action_immutable, actions, top_down=False)
+        return ActionConnectorDataType(
+            ac_data.env_id,
+            ac_data.agent_id,
+            ac_data.input_dict,
+            (actions, states, fetches),
+        )
+    def to_state(self):
+        return ImmutableActionsConnector.__name__, None
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return ImmutableActionsConnector(ctx)
+register_connector(ImmutableActionsConnector.__name__, ImmutableActionsConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/lambdas.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from typing import Any, Callable, Dict, Type
+from ray.rllib.connectors.connector import (
+    ActionConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.utils.numpy import convert_to_numpy
+from ray.rllib.utils.typing import (
+    ActionConnectorDataType,
+    PolicyOutputType,
+    StateBatches,
+    TensorStructType,
+)
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+def register_lambda_action_connector(
+    name: str, fn: Callable[[TensorStructType, StateBatches, Dict], PolicyOutputType]
+) -> Type[ActionConnector]:
+    """A util to register any function transforming PolicyOutputType as an ActionConnector.
+    The only requirement is that fn should take actions, states, and fetches as input,
+    and return transformed actions, states, and fetches.
+    Args:
+        name: Name of the resulting actor connector.
+        fn: The function that transforms PolicyOutputType.
+    Returns:
+        A new ActionConnector class that transforms PolicyOutputType using fn.
+    """
+    class LambdaActionConnector(ActionConnector):
+        def transform(
+            self, ac_data: ActionConnectorDataType
+        ) -> ActionConnectorDataType:
+            assert isinstance(
+                ac_data.output, tuple
+            ), "Action connector requires PolicyOutputType data."
+            actions, states, fetches = ac_data.output
+            return ActionConnectorDataType(
+                ac_data.env_id,
+                ac_data.agent_id,
+                ac_data.input_dict,
+                fn(actions, states, fetches),
+            )
+        def to_state(self):
+            return name, None
+        @staticmethod
+        def from_state(ctx: ConnectorContext, params: Any):
+            return LambdaActionConnector(ctx)
+    LambdaActionConnector.__name__ = name
+    LambdaActionConnector.__qualname__ = name
+    register_connector(name, LambdaActionConnector)
+    return LambdaActionConnector
+# Convert actions and states into numpy arrays if necessary.
+ConvertToNumpyConnector = OldAPIStack(
+    register_lambda_action_connector(
+        "ConvertToNumpyConnector",
+        lambda actions, states, fetches: (
+            convert_to_numpy(actions),
+            convert_to_numpy(states),
+            fetches,
+        ),
+    ),
+)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/normalize.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import Any
+from ray.rllib.connectors.connector import (
+    ActionConnector,
+    ConnectorContext,
+)
+from ray.rllib.connectors.registry import register_connector
+from ray.rllib.utils.spaces.space_utils import (
+    get_base_struct_from_space,
+    unsquash_action,
+)
+from ray.rllib.utils.typing import ActionConnectorDataType
+from ray.rllib.utils.annotations import OldAPIStack
+@OldAPIStack
+class NormalizeActionsConnector(ActionConnector):
+    def __init__(self, ctx: ConnectorContext):
+        super().__init__(ctx)
+        self._action_space_struct = get_base_struct_from_space(ctx.action_space)
+    def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
+        assert isinstance(
+            ac_data.output, tuple
+        ), "Action connector requires PolicyOutputType data."
+        actions, states, fetches = ac_data.output
+        return ActionConnectorDataType(
+            ac_data.env_id,
+            ac_data.agent_id,
+            ac_data.input_dict,
+            (unsquash_action(actions, self._action_space_struct), states, fetches),
+        )
+    def to_state(self):
+        return NormalizeActionsConnector.__name__, None
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        return NormalizeActionsConnector(ctx)
+register_connector(NormalizeActionsConnector.__name__, NormalizeActionsConnector)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/pipeline.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import logging
+from typing import Any, List
+from collections import defaultdict
+from ray.rllib.connectors.connector import (
+    ActionConnector,
+    Connector,
+    ConnectorContext,
+    ConnectorPipeline,
+)
+from ray.rllib.connectors.registry import get_connector, register_connector
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.utils.typing import ActionConnectorDataType
+from ray.util.timer import _Timer
+logger = logging.getLogger(__name__)
+@OldAPIStack
+class ActionConnectorPipeline(ConnectorPipeline, ActionConnector):
+    def __init__(self, ctx: ConnectorContext, connectors: List[Connector]):
+        super().__init__(ctx, connectors)
+        self.timers = defaultdict(_Timer)
+    def __call__(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
+        for c in self.connectors:
+            timer = self.timers[str(c)]
+            with timer:
+                ac_data = c(ac_data)
+        return ac_data
+    def to_state(self):
+        children = []
+        for c in self.connectors:
+            state = c.to_state()
+            assert isinstance(state, tuple) and len(state) == 2, (
+                "Serialized connector state must be in the format of "
+                f"Tuple[name: str, params: Any]. Instead we got {state}"
+                f"for connector {c.__name__}."
+            )
+            children.append(state)
+        return ActionConnectorPipeline.__name__, children
+    @staticmethod
+    def from_state(ctx: ConnectorContext, params: Any):
+        assert (
+            type(params) is list
+        ), "ActionConnectorPipeline takes a list of connector params."
+        connectors = []
+        for state in params:
+            try:
+                name, subparams = state
+                connectors.append(get_connector(name, ctx, subparams))
+            except Exception as e:
+                logger.error(f"Failed to de-serialize connector state: {state}")
+                raise e
+        return ActionConnectorPipeline(ctx, connectors)
+register_connector(ActionConnectorPipeline.__name__, ActionConnectorPipeline)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (199 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/clip_reward.cpython-311.pyc ADDED Viewed

Binary file (3.06 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/env_sampling.cpython-311.pyc ADDED Viewed

Binary file (2.11 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/pipeline.cpython-311.pyc ADDED Viewed

Binary file (4.74 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/synced_filter.cpython-311.pyc ADDED Viewed

Binary file (3.05 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/view_requirement.cpython-311.pyc ADDED Viewed

Binary file (6.24 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import (
+    AddObservationsFromEpisodesToBatch,
+)
+from ray.rllib.connectors.common.add_states_from_episodes_to_batch import (
+    AddStatesFromEpisodesToBatch,
+)
+from ray.rllib.connectors.common.add_time_dim_to_batch_and_zero_pad import (
+    AddTimeDimToBatchAndZeroPad,
+)
+from ray.rllib.connectors.common.agent_to_module_mapping import AgentToModuleMapping
+from ray.rllib.connectors.common.batch_individual_items import BatchIndividualItems
+from ray.rllib.connectors.common.numpy_to_tensor import NumpyToTensor
+__all__ = [
+    "AddObservationsFromEpisodesToBatch",
+    "AddStatesFromEpisodesToBatch",
+    "AddTimeDimToBatchAndZeroPad",
+    "AgentToModuleMapping",
+    "BatchIndividualItems",
+    "NumpyToTensor",
+]

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.02 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_observations_from_episodes_to_batch.cpython-311.pyc ADDED Viewed

Binary file (7.36 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_states_from_episodes_to_batch.cpython-311.pyc ADDED Viewed

Binary file (13.7 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_time_dim_to_batch_and_zero_pad.cpython-311.pyc ADDED Viewed

Binary file (12 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/agent_to_module_mapping.cpython-311.pyc ADDED Viewed

Binary file (12.1 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/batch_individual_items.cpython-311.pyc ADDED Viewed

Binary file (8.01 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/frame_stacking.cpython-311.pyc ADDED Viewed

Binary file (7.19 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/module_to_agent_unmapping.cpython-311.pyc ADDED Viewed

Binary file (2.83 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/numpy_to_tensor.cpython-311.pyc ADDED Viewed

Binary file (6.02 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/tensor_to_numpy.cpython-311.pyc ADDED Viewed

Binary file (1.79 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_observations_from_episodes_to_batch.py ADDED Viewed

	@@ -0,0 +1,180 @@

+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+from ray.rllib.core.columns import Columns
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class AddObservationsFromEpisodesToBatch(ConnectorV2):
+    """Gets the last observation from a running episode and adds it to the batch.
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    This ConnectorV2:
+    - Operates on a list of Episode objects (single- or multi-agent).
+    - Gets the most recent observation(s) from all the given episodes and adds them
+    to the batch under construction (as a list of individual observations).
+    - Does NOT alter any observations (or other data) in the given episodes.
+    - Can be used in EnvToModule and Learner connector pipelines.
+    .. testcode::
+        import gymnasium as gym
+        import numpy as np
+        from ray.rllib.connectors.common import AddObservationsFromEpisodesToBatch
+        from ray.rllib.env.single_agent_episode import SingleAgentEpisode
+        from ray.rllib.utils.test_utils import check
+        # Create two dummy SingleAgentEpisodes, each containing 2 observations,
+        # 1 action and 1 reward (both are length=1).
+        obs_space = gym.spaces.Box(-1.0, 1.0, (2,), np.float32)
+        act_space = gym.spaces.Discrete(2)
+        episodes = [SingleAgentEpisode(
+            observations=[obs_space.sample(), obs_space.sample()],
+            actions=[act_space.sample()],
+            rewards=[1.0],
+            len_lookback_buffer=0,
+        ) for _ in range(2)]
+        eps_1_last_obs = episodes[0].get_observations(-1)
+        eps_2_last_obs = episodes[1].get_observations(-1)
+        print(f"1st Episode's last obs is {eps_1_last_obs}")
+        print(f"2nd Episode's last obs is {eps_2_last_obs}")
+        # Create an instance of this class.
+        connector = AddObservationsFromEpisodesToBatch()
+        # Call the connector with the two created episodes.
+        # Note that this particular connector works without an RLModule, so we
+        # simplify here for the sake of this example.
+        output_batch = connector(
+            rl_module=None,
+            batch={},
+            episodes=episodes,
+            explore=True,
+            shared_data={},
+        )
+        # The output data should now contain the last observations of both episodes,
+        # in a "per-episode organized" fashion.
+        check(
+            output_batch,
+            {
+                "obs": {
+                    (episodes[0].id_,): [eps_1_last_obs],
+                    (episodes[1].id_,): [eps_2_last_obs],
+                },
+            },
+        )
+    """
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        as_learner_connector: bool = False,
+        **kwargs,
+    ):
+        """Initializes a AddObservationsFromEpisodesToBatch instance.
+        Args:
+            as_learner_connector: Whether this connector is part of a Learner connector
+                pipeline, as opposed to a env-to-module pipeline. As a Learner
+                connector, it will add an entire Episode's observations (each timestep)
+                to the batch.
+        """
+        super().__init__(
+            input_observation_space=input_observation_space,
+            input_action_space=input_action_space,
+            **kwargs,
+        )
+        self._as_learner_connector = as_learner_connector
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # If "obs" already in data, early out.
+        if Columns.OBS in batch:
+            return batch
+        for i, sa_episode in enumerate(
+            self.single_agent_episode_iterator(
+                episodes,
+                # If Learner connector, get all episodes (for train batch).
+                # If EnvToModule, get only those ongoing episodes that just had their
+                # agent step (b/c those are the ones we need to compute actions for
+                # next).
+                agents_that_stepped_only=not self._as_learner_connector,
+            )
+        ):
+            if self._as_learner_connector:
+                # TODO (sven): Resolve this hack by adding a new connector piece that
+                #  performs this very task.
+                if "_" not in sa_episode.id_:
+                    sa_episode.id_ += "_" + str(i)
+                self.add_n_batch_items(
+                    batch,
+                    Columns.OBS,
+                    # Add all observations, except the very last one.
+                    # For a terminated episode, this is the terminal observation that
+                    # has no value for training.
+                    # For a truncated episode, algorithms either add an extra NEXT_OBS
+                    # column to the batch (ex. DQN) or extend the episode length by one
+                    # (using a separate connector piece and this truncated last obs),
+                    # then bootstrap the value estimation for that extra timestep.
+                    items_to_add=sa_episode.get_observations(slice(0, len(sa_episode))),
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+            else:
+                assert not sa_episode.is_numpy
+                self.add_batch_item(
+                    batch,
+                    Columns.OBS,
+                    item_to_add=sa_episode.get_observations(-1),
+                    single_agent_episode=sa_episode,
+                )
+        return batch

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_states_from_episodes_to_batch.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import math
+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+import numpy as np
+import tree  # pip install dm_tree
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core import DEFAULT_MODULE_ID
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.numpy import convert_to_numpy
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class AddStatesFromEpisodesToBatch(ConnectorV2):
+    """Gets last STATE_OUT from running episode and adds it as STATE_IN to the batch.
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    If the RLModule is stateful, the episodes' STATE_OUTS will be extracted
+    and restructured under a new STATE_IN key.
+    As a Learner connector, the resulting STATE_IN batch has the shape (B', ...).
+    Here, B' is the sum of splits we have to do over the given episodes, such that each
+    chunk is at most `max_seq_len` long (T-axis).
+    As a EnvToModule connector, the resulting STATE_IN batch simply consists of n
+    states coming from n vectorized environments/episodes.
+    Also, all other data (observations, rewards, etc.. if applicable) will be properly
+    reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
+    zero-padded, if necessary.
+    This ConnectorV2:
+    - Operates on a list of Episode objects.
+    - Gets the most recent STATE_OUT from all the given episodes and adds them under
+    the STATE_IN key to the batch under construction.
+    - Does NOT alter any data in the given episodes.
+    - Can be used in EnvToModule and Learner connector pipelines.
+    .. testcode::
+        from ray.rllib.connectors.common import AddStatesFromEpisodesToBatch
+        from ray.rllib.core.columns import Columns
+        from ray.rllib.env.single_agent_episode import SingleAgentEpisode
+        from ray.rllib.utils.test_utils import check
+        # Create a simple dummy class, pretending to be an RLModule with
+        # `get_initial_state`, `is_stateful` and `model_config` property defined:
+        class MyStateModule:
+            # dummy config
+            model_config = {"max_seq_len": 2}
+            def is_stateful(self):
+                return True
+            def get_initial_state(self):
+                return 0.0
+        # Create an empty episode. The connector should use the RLModule's initial state
+        # to populate STATE_IN for the next forward pass.
+        episode = SingleAgentEpisode()
+        rl_module = MyStateModule()
+        rl_module_init_state = rl_module.get_initial_state()
+        # Create an instance of this class (as a env-to-module connector).
+        connector = AddStatesFromEpisodesToBatch(as_learner_connector=False)
+        # Call the connector.
+        output_batch = connector(
+            rl_module=rl_module,
+            batch={},
+            episodes=[episode],
+            shared_data={},
+        )
+        # The output data's STATE_IN key should now contain the RLModule's initial state
+        # plus the one state out found in the episode in a "per-episode organized"
+        # fashion.
+        check(
+            output_batch[Columns.STATE_IN],
+            {
+                (episode.id_,): [rl_module_init_state],
+            },
+        )
+        # Create a SingleAgentEpisodes containing 5 observations,
+        # 4 actions and 4 rewards, and 4 STATE_OUTs.
+        # The same connector should now use the episode-stored last STATE_OUT as
+        # STATE_IN for the next forward pass.
+        episode = SingleAgentEpisode(
+            observations=[0, 1, 2, 3, 4],
+            actions=[1, 2, 3, 4],
+            rewards=[1.0, 2.0, 3.0, 4.0],
+            # STATE_OUT in episode will show up under STATE_IN in the batch.
+            extra_model_outputs={
+                Columns.STATE_OUT: [-4.0, -3.0, -2.0, -1.0],
+            },
+            len_lookback_buffer = 0,
+        )
+        # Call the connector.
+        output_batch = connector(
+            rl_module=rl_module,
+            batch={},
+            episodes=[episode],
+            shared_data={},
+        )
+        # The output data's STATE_IN key should now contain the episode's last
+        # STATE_OUT, NOT the RLModule's initial state in a "per-episode organized"
+        # fashion.
+        check(
+            output_batch[Columns.STATE_IN],
+            {
+                # Expect the episode's last STATE_OUT.
+                (episode.id_,): [-1.0],
+            },
+        )
+        # Create a new connector as a learner connector with a RNN seq len of 2 (for
+        # testing purposes only). Passing the same data through this learner connector,
+        # we expect the STATE_IN data to contain a) the initial module state and then
+        # every 2nd STATE_OUT stored in the episode.
+        connector = AddStatesFromEpisodesToBatch(as_learner_connector=True)
+        # Call the connector.
+        output_batch = connector(
+            rl_module=rl_module,
+            batch={},
+            episodes=[episode],
+            shared_data={},
+        )
+        check(
+            output_batch[Columns.STATE_IN],
+            {
+                # Expect initial module state + every 2nd STATE_OUT from episode, but
+                # not the very last one (just like the very last observation, this data
+                # is NOT passed through the forward_train, b/c there is nothing to learn
+                # at that timestep, unless we need to compute e.g. bootstrap value
+                # predictions).
+                # Also note that the different STATE_IN timesteps are already present
+                # as one batched item per episode in the list.
+                (episode.id_,): [rl_module_init_state, -3.0],
+            },
+        )
+    """
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        as_learner_connector: bool = False,
+        **kwargs,
+    ):
+        """Initializes a AddObservationsFromEpisodesToBatch instance.
+        Args:
+            as_learner_connector: Whether this connector is part of a Learner connector
+                pipeline, as opposed to a env-to-module pipeline. As a Learner
+                connector, it will add an entire Episode's observations (each timestep)
+                to the batch.
+        """
+        super().__init__(
+            input_observation_space=input_observation_space,
+            input_action_space=input_action_space,
+            **kwargs,
+        )
+        self._as_learner_connector = as_learner_connector
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # If not stateful OR STATE_IN already in data, early out.
+        if not rl_module.is_stateful() or Columns.STATE_IN in batch:
+            return batch
+        for sa_episode in self.single_agent_episode_iterator(
+            episodes,
+            # If Learner connector, get all episodes (for train batch).
+            # If EnvToModule, get only those ongoing episodes that just had their
+            # agent step (b/c those are the ones we need to compute actions for next).
+            agents_that_stepped_only=not self._as_learner_connector,
+        ):
+            if self._as_learner_connector:
+                # Multi-agent case: Extract correct single agent RLModule (to get its
+                # individual state).
+                if sa_episode.module_id is not None:
+                    sa_module = rl_module[sa_episode.module_id]
+                else:
+                    sa_module = (
+                        rl_module[DEFAULT_MODULE_ID]
+                        if isinstance(rl_module, MultiRLModule)
+                        else rl_module
+                    )
+                # This single-agent RLModule is NOT stateful -> Skip.
+                if not sa_module.is_stateful():
+                    continue
+                max_seq_len = sa_module.model_config["max_seq_len"]
+                # look_back_state.shape=([state-dim],)
+                look_back_state = (
+                    # Episode has a (reset) beginning -> Prepend initial
+                    # state.
+                    convert_to_numpy(sa_module.get_initial_state())
+                    if sa_episode.t_started == 0
+                    or (Columns.STATE_OUT not in sa_episode.extra_model_outputs)
+                    # Episode starts somewhere in the middle (is a cut
+                    # continuation chunk) -> Use previous chunk's last
+                    # STATE_OUT as initial state.
+                    else sa_episode.get_extra_model_outputs(
+                        key=Columns.STATE_OUT,
+                        indices=-1,
+                        neg_index_as_lookback=True,
+                    )
+                )
+                # If we have `"state_out"`s (e.g. from rollouts) use them for the
+                # `"state_in"`s.
+                if Columns.STATE_OUT in sa_episode.extra_model_outputs:
+                    # state_outs.shape=(T,[state-dim])  T=episode len
+                    state_outs = sa_episode.get_extra_model_outputs(
+                        key=Columns.STATE_OUT
+                    )
+                # Otherwise, we have no `"state_out"` (e.g. because we are sampling
+                # from offline data and the expert policy was not stateful).
+                else:
+                    # Then simply use the `look_back_state`, i.e. in this case the
+                    # initial state as `"state_in` in training.
+                    if sa_episode.is_numpy:
+                        state_outs = tree.map_structure(
+                            lambda a, _sae=sa_episode: np.repeat(
+                                a[np.newaxis, ...], len(_sae), axis=0
+                            ),
+                            look_back_state,
+                        )
+                    else:
+                        state_outs = [look_back_state for _ in range(len(sa_episode))]
+                # Explanation:
+                # B=episode len // max_seq_len
+                # [::max_seq_len]: only keep every Tth state.
+                # [:-1]: Shift state outs by one; ignore very last
+                # STATE_OUT, but therefore add the lookback/init state at
+                # the beginning.
+                items_to_add = (
+                    tree.map_structure(
+                        lambda i, o, m=max_seq_len: np.concatenate([[i], o[:-1]])[::m],
+                        look_back_state,
+                        state_outs,
+                    )
+                    if sa_episode.is_numpy
+                    else ([look_back_state] + state_outs[:-1])[::max_seq_len]
+                )
+                self.add_n_batch_items(
+                    batch=batch,
+                    column=Columns.STATE_IN,
+                    items_to_add=items_to_add,
+                    num_items=int(math.ceil(len(sa_episode) / max_seq_len)),
+                    single_agent_episode=sa_episode,
+                )
+                if Columns.NEXT_OBS in batch:
+                    items_to_add = (
+                        tree.map_structure(
+                            lambda i, m=max_seq_len: i[::m],
+                            state_outs,
+                        )
+                        if sa_episode.is_numpy
+                        else state_outs[::max_seq_len]
+                    )
+                    self.add_n_batch_items(
+                        batch=batch,
+                        column=Columns.NEXT_STATE_IN,
+                        items_to_add=items_to_add,
+                        num_items=int(math.ceil(len(sa_episode) / max_seq_len)),
+                        single_agent_episode=sa_episode,
+                    )
+            else:
+                assert not sa_episode.is_numpy
+                # Multi-agent case: Extract correct single agent RLModule (to get the
+                # state for individually).
+                sa_module = rl_module
+                if sa_episode.module_id is not None:
+                    sa_module = rl_module[sa_episode.module_id]
+                # This single-agent RLModule is NOT stateful -> Skip.
+                if not sa_module.is_stateful():
+                    continue
+                # Episode just started or has no `"state_out"` (e.g. in offline
+                # sampling) -> Get initial state from our RLModule.
+                if (sa_episode.t_started == 0 and len(sa_episode) == 0) or (
+                    Columns.STATE_OUT not in sa_episode.extra_model_outputs
+                ):
+                    state = sa_module.get_initial_state()
+                # Episode is already ongoing -> Use most recent STATE_OUT.
+                else:
+                    state = sa_episode.get_extra_model_outputs(
+                        key=Columns.STATE_OUT,
+                        indices=-1,
+                    )
+                self.add_batch_item(
+                    batch,
+                    Columns.STATE_IN,
+                    item_to_add=state,
+                    single_agent_episode=sa_episode,
+                )
+        return batch

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_time_dim_to_batch_and_zero_pad.py ADDED Viewed

	@@ -0,0 +1,302 @@

+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+import numpy as np
+import tree  # pip install dm_tree
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core import DEFAULT_MODULE_ID
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.postprocessing.zero_padding import (
+    create_mask_and_seq_lens,
+    split_and_zero_pad,
+)
+from ray.rllib.utils.spaces.space_utils import BatchedNdArray
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class AddTimeDimToBatchAndZeroPad(ConnectorV2):
+    """Adds an extra time dim (axis=1) to all data currently in the batch.
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    If the RLModule is stateful, an extra time dim at axis=1 is added to all data in the
+    batch.
+    Also, all data (observations, rewards, etc.. if applicable) will be properly
+    reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
+    zero-padded, if necessary.
+    This ConnectorV2:
+    - Operates on a list of Episode objects.
+    - Adds a time dim at axis=1 to all columns already in the batch.
+    - In case of a learner connector pipeline, zero-pads the data according to the
+    module's `self.model_config["max_seq_len"]` setting and reshapes all data to
+    (B, T, ...). The connector also adds SEQ_LENS information and loss mask
+    information to the batch based on the added zero-padding.
+    - Does NOT alter any data in the given episodes.
+    - Can be used in EnvToModule and Learner connector pipelines.
+    .. testcode::
+        from ray.rllib.connectors.common import AddTimeDimToBatchAndZeroPad
+        from ray.rllib.core.columns import Columns
+        from ray.rllib.env.single_agent_episode import SingleAgentEpisode
+        from ray.rllib.utils.test_utils import check
+        # Create a simple dummy class, pretending to be an RLModule with
+        # `get_initial_state`, `is_stateful` and `model_config` property defined:
+        class MyStateModule:
+            # dummy config
+            model_config = {"max_seq_len": 3}
+            def is_stateful(self):
+                return True
+            def get_initial_state(self):
+                return 0.0
+        # Create an already reset episode. Expect the connector to add a time-dim to the
+        # reset observation.
+        episode = SingleAgentEpisode(observations=[0])
+        rl_module = MyStateModule()
+        # Create an instance of this class (as an env-to-module connector).
+        connector = AddTimeDimToBatchAndZeroPad(as_learner_connector=False)
+        # Call the connector.
+        output_batch = connector(
+            rl_module=rl_module,
+            batch={Columns.OBS: [0]},
+            episodes=[episode],
+            shared_data={},
+        )
+        # The output data's OBS key should now be reshaped to (B, T)
+        check(output_batch[Columns.OBS], [[0]])
+        # Create a SingleAgentEpisodes containing 5 observations,
+        # 4 actions and 4 rewards.
+        episode = SingleAgentEpisode(
+            observations=[0, 1, 2, 3, 4],
+            actions=[1, 2, 3, 4],
+            rewards=[1.0, 2.0, 3.0, 4.0],
+            len_lookback_buffer=0,
+        )
+        # Call the connector.
+        output_batch = connector(
+            rl_module=rl_module,
+            batch={Columns.OBS: [4]},
+            episodes=[episode],
+            shared_data={},
+        )
+        # The output data's OBS, ACTIONS, and REWARDS keys should now all have a time
+        # rank.
+        check(
+            # Expect the episode's last OBS.
+            output_batch[Columns.OBS], [[4]],
+        )
+        # Create a new connector as a learner connector with a RNN seq len of 4 (for
+        # testing purposes only). Passing the same data through this learner connector,
+        # we expect the data to also be zero-padded.
+        connector = AddTimeDimToBatchAndZeroPad(as_learner_connector=True)
+        # Call the connector.
+        output_batch = connector(
+            rl_module=rl_module,
+            batch={Columns.OBS: {(episode.id_,): [0, 1, 2, 3]}},
+            episodes=[episode],
+            shared_data={},
+        )
+        check(output_batch[Columns.OBS], {(episode.id_,): [[0, 1, 2], [3, 0, 0]]})
+    """
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        as_learner_connector: bool = False,
+        **kwargs,
+    ):
+        """Initializes a AddObservationsFromEpisodesToBatch instance.
+        Args:
+            as_learner_connector: Whether this connector is part of a Learner connector
+                pipeline, as opposed to a env-to-module pipeline. As a Learner
+                connector, it will add an entire Episode's observations (each timestep)
+                to the batch.
+        """
+        super().__init__(
+            input_observation_space=input_observation_space,
+            input_action_space=input_action_space,
+            **kwargs,
+        )
+        self._as_learner_connector = as_learner_connector
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # If not stateful OR STATE_IN already in data, early out.
+        if not rl_module.is_stateful() or Columns.STATE_IN in batch:
+            return batch
+        # Make all inputs (other than STATE_IN) have an additional T-axis.
+        # Since data has not been batched yet (we are still operating on lists in the
+        # batch), we add this time axis as 0 (not 1). When we batch, the batch axis will
+        # be 0 and the time axis will be 1.
+        # Also, let module-to-env pipeline know that we had added a single timestep
+        # time rank to the data (to remove it again).
+        if not self._as_learner_connector:
+            for column in batch.keys():
+                self.foreach_batch_item_change_in_place(
+                    batch=batch,
+                    column=column,
+                    func=lambda item, eps_id, aid, mid: (
+                        item
+                        if mid is not None and not rl_module[mid].is_stateful()
+                        # Expand on axis 0 (the to-be-time-dim) if item has not been
+                        # batched yet, otherwise axis=1 (the time-dim).
+                        else tree.map_structure(
+                            lambda s: np.expand_dims(
+                                s, axis=(1 if isinstance(s, BatchedNdArray) else 0)
+                            ),
+                            item,
+                        )
+                    ),
+                )
+            shared_data["_added_single_ts_time_rank"] = True
+        else:
+            # Before adding STATE_IN to the `data`, zero-pad existing data and batch
+            # into max_seq_len chunks.
+            for column, column_data in batch.copy().items():
+                # Do not zero-pad INFOS column.
+                if column == Columns.INFOS:
+                    continue
+                for key, item_list in column_data.items():
+                    # Multi-agent case AND RLModule is not stateful -> Do not zero-pad
+                    # for this model.
+                    assert isinstance(key, tuple)
+                    mid = None
+                    if len(key) == 3:
+                        eps_id, aid, mid = key
+                        if not rl_module[mid].is_stateful():
+                            continue
+                    column_data[key] = split_and_zero_pad(
+                        item_list,
+                        max_seq_len=self._get_max_seq_len(rl_module, module_id=mid),
+                    )
+                    # TODO (sven): Remove this hint/hack once we are not relying on
+                    #  SampleBatch anymore (which has to set its property
+                    #  zero_padded=True when shuffling).
+                    shared_data[
+                        (
+                            "_zero_padded_for_mid="
+                            f"{mid if mid is not None else DEFAULT_MODULE_ID}"
+                        )
+                    ] = True
+            for sa_episode in self.single_agent_episode_iterator(
+                # If Learner connector, get all episodes (for train batch).
+                # If EnvToModule, get only those ongoing episodes that just had their
+                # agent step (b/c those are the ones we need to compute actions for next).
+                episodes,
+                agents_that_stepped_only=False,
+            ):
+                # Multi-agent case: Extract correct single agent RLModule (to get its
+                # individual state).
+                if sa_episode.module_id is not None:
+                    sa_module = rl_module[sa_episode.module_id]
+                else:
+                    sa_module = (
+                        rl_module[DEFAULT_MODULE_ID]
+                        if isinstance(rl_module, MultiRLModule)
+                        else rl_module
+                    )
+                # This single-agent RLModule is NOT stateful -> Skip.
+                if not sa_module.is_stateful():
+                    continue
+                max_seq_len = sa_module.model_config["max_seq_len"]
+                # Also, create the loss mask (b/c of our now possibly zero-padded data)
+                # as well as the seq_lens array and add these to `data` as well.
+                mask, seq_lens = create_mask_and_seq_lens(len(sa_episode), max_seq_len)
+                self.add_n_batch_items(
+                    batch=batch,
+                    column=Columns.SEQ_LENS,
+                    items_to_add=seq_lens,
+                    num_items=len(seq_lens),
+                    single_agent_episode=sa_episode,
+                )
+                if not shared_data.get("_added_loss_mask_for_valid_episode_ts"):
+                    self.add_n_batch_items(
+                        batch=batch,
+                        column=Columns.LOSS_MASK,
+                        items_to_add=mask,
+                        num_items=len(mask),
+                        single_agent_episode=sa_episode,
+                    )
+        return batch
+    def _get_max_seq_len(self, rl_module, module_id=None):
+        if not isinstance(rl_module, MultiRLModule):
+            mod = rl_module
+        elif module_id:
+            mod = rl_module[module_id]
+        else:
+            mod = next(iter(rl_module.values()))
+        if "max_seq_len" not in mod.model_config:
+            raise ValueError(
+                "You are using a stateful RLModule and are not providing a "
+                "'max_seq_len' key inside your `model_config`. You can set this "
+                "dict and/or override keys in it via `config.rl_module("
+                "model_config={'max_seq_len': [some int]})`."
+            )
+        return mod.model_config["max_seq_len"]

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/agent_to_module_mapping.py ADDED Viewed

	@@ -0,0 +1,291 @@

+from collections import defaultdict
+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
+from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import EpisodeType, ModuleID
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class AgentToModuleMapping(ConnectorV2):
+    """ConnectorV2 that performs mapping of data from AgentID based to ModuleID based.
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    This connector piece is only used by RLlib (as a default connector piece) in a
+    multi-agent setup.
+    Note that before the mapping, `data` is expected to have the following
+    structure:
+    [col0]:
+        (eps_id0, ag0, mod0): [list of individual batch items]
+        (eps_id0, ag1, mod2): [list of individual batch items]
+        (eps_id1, ag0, mod1): [list of individual batch items]
+    [col1]:
+        etc..
+    The target structure of the above `data` would then be:
+    [mod0]:
+        [col0]: [batched data -> batch_size_B will be the number of all items in the
+            input data under col0 that have mod0 as their ModuleID]
+        [col1]: [batched data]
+    [mod1]:
+        [col0]: etc.
+    Mapping happens in the following stages:
+    1) Under each column name, sort keys first by EpisodeID, then AgentID.
+    2) Add ModuleID keys under each column name (no cost/extra memory) and map these
+    new keys to empty lists.
+    [col0] -> [mod0] -> []: Then push items that belong to mod0 into these lists.
+    3) Perform batching on the per-module lists under each column:
+    [col0] -> [mod0]: [...] <- now batched data (numpy array or struct of numpy
+    arrays).
+    4) Flip column names with ModuleIDs (no cost/extra memory):
+    [mod0]:
+        [col0]: [batched data]
+    etc..
+    Note that in order to unmap the resulting batch back into an AgentID based one,
+    we have to store the env vector index AND AgentID of each module's batch item
+    in an additionally returned `memorized_map_structure`.
+    .. testcode::
+        from ray.rllib.connectors.env_to_module import AgentToModuleMapping
+        from ray.rllib.utils.test_utils import check
+        batch = {
+            "obs": {
+                ("MA-EPS0", "agent0", "module0"): [0, 1, 2],
+                ("MA-EPS0", "agent1", "module1"): [3, 4, 5],
+            },
+            "actions": {
+                ("MA-EPS1", "agent2", "module0"): [8],
+                ("MA-EPS0", "agent1", "module1"): [9],
+            },
+        }
+        # Create our connector piece.
+        connector = AgentToModuleMapping(
+            rl_module_specs={"module0", "module1"},
+            agent_to_module_mapping_fn=(
+                lambda agent_id, eps: "module1" if agent_id == "agent1" else "module0"
+            ),
+        )
+        # Call the connector (and thereby flip from AgentID based to ModuleID based
+        # structure..
+        output_batch = connector(
+            rl_module=None,  # This particular connector works without an RLModule.
+            batch=batch,
+            episodes=[],  # This particular connector works without a list of episodes.
+            explore=True,
+            shared_data={},
+        )
+        # `data` should now be mapped from ModuleIDs to module data.
+        check(
+            output_batch,
+            {
+                "module0": {
+                    "obs": [0, 1, 2],
+                    "actions": [8],
+                },
+                "module1": {
+                    "obs": [3, 4, 5],
+                    "actions": [9],
+                },
+            },
+        )
+    """
+    @override(ConnectorV2)
+    def recompute_output_observation_space(
+        self,
+        input_observation_space: gym.Space,
+        input_action_space: gym.Space,
+    ) -> gym.Space:
+        return self._map_space_if_necessary(input_observation_space, "obs")
+    @override(ConnectorV2)
+    def recompute_output_action_space(
+        self,
+        input_observation_space: gym.Space,
+        input_action_space: gym.Space,
+    ) -> gym.Space:
+        return self._map_space_if_necessary(input_action_space, "act")
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        rl_module_specs: Dict[ModuleID, RLModuleSpec],
+        agent_to_module_mapping_fn,
+    ):
+        super().__init__(input_observation_space, input_action_space)
+        self._rl_module_specs = rl_module_specs
+        self._agent_to_module_mapping_fn = agent_to_module_mapping_fn
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # Current agent to module mapping function.
+        # agent_to_module_mapping_fn = shared_data.get("agent_to_module_mapping_fn")
+        # Store in shared data, which module IDs map to which episode/agent, such
+        # that the module-to-env pipeline can map the data back to agents.
+        memorized_map_structure = defaultdict(list)
+        for column, agent_data in batch.items():
+            if rl_module is not None and column in rl_module:
+                continue
+            for eps_id, agent_id, module_id in agent_data.keys():
+                memorized_map_structure[module_id].append((eps_id, agent_id))
+            # TODO (sven): We should check that all columns have the same struct.
+            break
+        shared_data["memorized_map_structure"] = dict(memorized_map_structure)
+        # Mapping from ModuleID to column data.
+        data_by_module = {}
+        # Iterating over each column in the original data:
+        for column, agent_data in batch.items():
+            if rl_module is not None and column in rl_module:
+                if column in data_by_module:
+                    data_by_module[column].update(agent_data)
+                else:
+                    data_by_module[column] = agent_data
+                continue
+            for (
+                eps_id,
+                agent_id,
+                module_id,
+            ), values_batch_or_list in agent_data.items():
+                assert isinstance(values_batch_or_list, list)
+                for value in values_batch_or_list:
+                    if module_id not in data_by_module:
+                        data_by_module[module_id] = {column: []}
+                    elif column not in data_by_module[module_id]:
+                        data_by_module[module_id][column] = []
+                    # Append the data.
+                    data_by_module[module_id][column].append(value)
+        return data_by_module
+    def _map_space_if_necessary(self, space: gym.Space, which: str = "obs"):
+        # Analyze input observation space to check, whether the user has already taken
+        # care of the agent to module mapping.
+        if set(self._rl_module_specs) == set(space.spaces.keys()):
+            return space
+        # We need to take care of agent to module mapping. Figure out the resulting
+        # observation space here.
+        dummy_eps = MultiAgentEpisode()
+        ret_space = {}
+        for module_id in self._rl_module_specs:
+            # Easy way out, user has provided space in the RLModule spec dict.
+            if (
+                isinstance(self._rl_module_specs, dict)
+                and module_id in self._rl_module_specs
+            ):
+                if (
+                    which == "obs"
+                    and self._rl_module_specs[module_id].observation_space
+                ):
+                    ret_space[module_id] = self._rl_module_specs[
+                        module_id
+                    ].observation_space
+                    continue
+                elif which == "act" and self._rl_module_specs[module_id].action_space:
+                    ret_space[module_id] = self._rl_module_specs[module_id].action_space
+                    continue
+            # Need to reverse map spaces (for the different agents) to certain
+            # module IDs (using a dummy MultiAgentEpisode).
+            one_space = next(iter(space.spaces.values()))
+            # If all obs spaces are the same anyway, just use the first
+            # single-agent space.
+            if all(s == one_space for s in space.spaces.values()):
+                ret_space[module_id] = one_space
+            # Otherwise, we have to compare the ModuleID with all possible
+            # AgentIDs and find the agent ID that matches.
+            else:
+                match_aid = None
+                one_agent_for_module_found = False
+                for aid in space.spaces.keys():
+                    # Match: Assign spaces for this agentID to the PolicyID.
+                    if self._agent_to_module_mapping_fn(aid, dummy_eps) == module_id:
+                        # Make sure, different agents that map to the same
+                        # policy don't have different spaces.
+                        if (
+                            module_id in ret_space
+                            and space[aid] != ret_space[module_id]
+                        ):
+                            raise ValueError(
+                                f"Two agents ({aid} and {match_aid}) in your "
+                                "environment map to the same ModuleID (as per your "
+                                "`agent_to_module_mapping_fn`), however, these agents "
+                                "also have different observation spaces as per the env!"
+                            )
+                        ret_space[module_id] = space[aid]
+                        match_aid = aid
+                        one_agent_for_module_found = True
+                # Still no space found for this module ID -> Error out.
+                if not one_agent_for_module_found:
+                    raise ValueError(
+                        f"Could not find or derive any {which}-space for RLModule "
+                        f"{module_id}! This can happen if your `config.rl_module(rl_"
+                        f"module_spec=...)` does NOT contain space information for this"
+                        " particular single-agent module AND your agent-to-module-"
+                        "mapping function is stochastic (such that for some agent A, "
+                        "more than one ModuleID might be returned somewhat randomly). "
+                        f"Fix this error by providing {which}-space information using "
+                        "`config.rl_module(rl_module_spec=MultiRLModuleSpec("
+                        f"rl_module_specs={{'{module_id}': RLModuleSpec("
+                        "observation_space=..., action_space=...)}}))"
+                    )
+        return gym.spaces.Dict(ret_space)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/batch_individual_items.py ADDED Viewed

	@@ -0,0 +1,200 @@

+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core import DEFAULT_MODULE_ID
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.spaces.space_utils import batch as batch_fn
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class BatchIndividualItems(ConnectorV2):
+    """Batches individual data-items (in lists) into tensors (with batch dimension).
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    This ConnectorV2:
+    - Operates only on the input `data`, NOT the incoming list of episode objects
+    (ignored).
+    - In the single-agent case, `data` must already be a dict, structured as follows by
+    prior connector pieces of the same pipeline:
+    [col0] -> {[(eps_id,)]: [list of individual batch items]}
+    - In the multi-agent case, `data` must already be a dict, structured as follows by
+    prior connector pieces of the same pipeline (in particular the
+    `AgentToModuleMapping` piece):
+    [module_id] -> [col0] -> [list of individual batch items]
+    - Translates the above data under the different columns (e.g. "obs") into final
+    (batched) structures. For the single-agent case, the output `data` looks like this:
+    [col0] -> [possibly complex struct of batches (at the leafs)].
+    For the multi-agent case, the output `data` looks like this:
+    [module_id] -> [col0] -> [possibly complex struct of batches (at the leafs)].
+    .. testcode::
+        from ray.rllib.connectors.common import BatchIndividualItems
+        from ray.rllib.utils.test_utils import check
+        single_agent_batch = {
+            "obs": {
+                # Note that at this stage, next-obs is not part of the data anymore ..
+                ("MA-EPS0",): [0, 1],
+                ("MA-EPS1",): [2, 3],
+            },
+            "actions": {
+                # .. so we have as many actions per episode as we have observations.
+                ("MA-EPS0",): [4, 5],
+                ("MA-EPS1",): [6, 7],
+            },
+        }
+        # Create our (single-agent) connector piece.
+        connector = BatchIndividualItems()
+        # Call the connector (and thereby batch the individual items).
+        output_batch = connector(
+            rl_module=None,  # This particular connector works without an RLModule.
+            batch=single_agent_batch,
+            episodes=[],  # This particular connector works without a list of episodes.
+            explore=True,
+            shared_data={},
+        )
+        # `output_batch` should now be batched (episode IDs should have been removed
+        # from the struct).
+        check(
+            output_batch,
+            {"obs": [0, 1, 2, 3], "actions": [4, 5, 6, 7]},
+        )
+    """
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        multi_agent: bool = False,
+        **kwargs,
+    ):
+        """Initializes a BatchIndividualItems instance.
+        Args:
+            multi_agent: Whether this is a connector operating on a multi-agent
+                observation space mapping AgentIDs to individual agents' observations.
+        """
+        super().__init__(
+            input_observation_space=input_observation_space,
+            input_action_space=input_action_space,
+            **kwargs,
+        )
+        self._multi_agent = multi_agent
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        is_multi_rl_module = isinstance(rl_module, MultiRLModule)
+        # Convert lists of individual items into properly batched data.
+        for column, column_data in batch.copy().items():
+            # Multi-agent case: This connector piece should only be used after(!)
+            # the AgentToModuleMapping connector has already been applied, leading
+            # to a batch structure of:
+            # [module_id] -> [col0] -> [list of individual batch items]
+            if is_multi_rl_module and column in rl_module:
+                # Case, in which a column has already been properly batched before this
+                # connector piece is called.
+                if not self._multi_agent:
+                    continue
+                # If MA Off-Policy and independent sampling we need to overcome this
+                # check.
+                module_data = column_data
+                for col, col_data in module_data.copy().items():
+                    if isinstance(col_data, list) and col != Columns.INFOS:
+                        module_data[col] = batch_fn(
+                            col_data,
+                            individual_items_already_have_batch_dim="auto",
+                        )
+            # Simple case: There is a list directly under `column`:
+            # Batch the list.
+            elif isinstance(column_data, list):
+                batch[column] = batch_fn(
+                    column_data,
+                    individual_items_already_have_batch_dim="auto",
+                )
+            # Single-agent case: There is a dict under `column` mapping
+            # `eps_id` to lists of items:
+            # Concat all these lists, then batch.
+            elif not self._multi_agent:
+                # TODO: only really need this in non-Learner connector pipeline
+                memorized_map_structure = []
+                list_to_be_batched = []
+                for (eps_id,) in column_data.keys():
+                    for item in column_data[(eps_id,)]:
+                        # Only record structure for OBS column.
+                        if column == Columns.OBS:
+                            memorized_map_structure.append(eps_id)
+                        list_to_be_batched.append(item)
+                # INFOS should not be batched (remain a list).
+                batch[column] = (
+                    list_to_be_batched
+                    if column == Columns.INFOS
+                    else batch_fn(
+                        list_to_be_batched,
+                        individual_items_already_have_batch_dim="auto",
+                    )
+                )
+                if is_multi_rl_module:
+                    if DEFAULT_MODULE_ID not in batch:
+                        batch[DEFAULT_MODULE_ID] = {}
+                    batch[DEFAULT_MODULE_ID][column] = batch.pop(column)
+                # Only record structure for OBS column.
+                if column == Columns.OBS:
+                    shared_data["memorized_map_structure"] = memorized_map_structure
+            # Multi-agent case: But Module ID not found in our RLModule -> Ignore this
+            # `module_id` entirely.
+            # else:
+            #    pass
+        return batch

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/frame_stacking.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import numpy as np
+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+import tree  # pip install dm_tree
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class _FrameStacking(ConnectorV2):
+    """A connector piece that stacks the previous n observations into one."""
+    @override(ConnectorV2)
+    def recompute_output_observation_space(
+        self,
+        input_observation_space: gym.Space,
+        input_action_space: gym.Space,
+    ) -> gym.Space:
+        # Change our observation space according to the given stacking settings.
+        if self._multi_agent:
+            ret = {}
+            for agent_id, obs_space in input_observation_space.spaces.items():
+                ret[agent_id] = self._convert_individual_space(obs_space)
+            return gym.spaces.Dict(ret)
+        else:
+            return self._convert_individual_space(input_observation_space)
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        num_frames: int = 1,
+        multi_agent: bool = False,
+        as_learner_connector: bool = False,
+        **kwargs,
+    ):
+        """Initializes a _FrameStackingConnector instance.
+        Args:
+            num_frames: The number of observation frames to stack up (into a single
+                observation) for the RLModule's forward pass.
+            multi_agent: Whether this is a connector operating on a multi-agent
+                observation space mapping AgentIDs to individual agents' observations.
+            as_learner_connector: Whether this connector is part of a Learner connector
+                pipeline, as opposed to an env-to-module pipeline.
+        """
+        super().__init__(
+            input_observation_space=input_observation_space,
+            input_action_space=input_action_space,
+            **kwargs,
+        )
+        self._multi_agent = multi_agent
+        self.num_frames = num_frames
+        self._as_learner_connector = as_learner_connector
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # Learner connector pipeline. Episodes have been numpy'ized.
+        if self._as_learner_connector:
+            for sa_episode in self.single_agent_episode_iterator(
+                episodes, agents_that_stepped_only=False
+            ):
+                def _map_fn(s, _sa_episode=sa_episode):
+                    # Squeeze out last dim.
+                    s = np.squeeze(s, axis=-1)
+                    # Calculate new shape and strides
+                    new_shape = (len(_sa_episode), self.num_frames) + s.shape[1:]
+                    new_strides = (s.strides[0],) + s.strides
+                    # Create a strided view of the array.
+                    return np.transpose(
+                        np.lib.stride_tricks.as_strided(
+                            s, shape=new_shape, strides=new_strides
+                        ),
+                        axes=[0, 2, 3, 1],
+                    )
+                # Get all observations from the episode in one np array (except for
+                # the very last one, which is the final observation not needed for
+                # learning).
+                self.add_n_batch_items(
+                    batch=batch,
+                    column=Columns.OBS,
+                    items_to_add=tree.map_structure(
+                        _map_fn,
+                        sa_episode.get_observations(
+                            indices=slice(-self.num_frames + 1, len(sa_episode)),
+                            neg_index_as_lookback=True,
+                            fill=0.0,
+                        ),
+                    ),
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+        # Env-to-module pipeline. Episodes still operate on lists.
+        else:
+            for sa_episode in self.single_agent_episode_iterator(episodes):
+                assert not sa_episode.is_numpy
+                # Get the list of observations to stack.
+                obs_stack = sa_episode.get_observations(
+                    indices=slice(-self.num_frames, None),
+                    fill=0.0,
+                )
+                # Observation components are (w, h, 1)
+                # -> concatenate along axis=-1 to (w, h, [num_frames]).
+                stacked_obs = tree.map_structure(
+                    lambda *s: np.concatenate(s, axis=2),
+                    *obs_stack,
+                )
+                self.add_batch_item(
+                    batch=batch,
+                    column=Columns.OBS,
+                    item_to_add=stacked_obs,
+                    single_agent_episode=sa_episode,
+                )
+        return batch
+    def _convert_individual_space(self, obs_space):
+        # Some assumptions: Space is box AND last dim (the stacking one) is 1.
+        assert isinstance(obs_space, gym.spaces.Box), obs_space
+        assert obs_space.shape[-1] == 1, obs_space
+        return gym.spaces.Box(
+            low=np.repeat(obs_space.low, repeats=self.num_frames, axis=-1),
+            high=np.repeat(obs_space.high, repeats=self.num_frames, axis=-1),
+            shape=list(obs_space.shape)[:-1] + [self.num_frames],
+            dtype=obs_space.dtype,
+        )

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/module_to_agent_unmapping.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from collections import defaultdict
+from typing import Any, Dict, List, Optional
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class ModuleToAgentUnmapping(ConnectorV2):
+    """Performs flipping of `data` from ModuleID- to AgentID based mapping.
+    Before mapping:
+    data[module1] -> [col, e.g. ACTIONS]
+    -> [dict mapping episode-identifying tuples to lists of data]
+    data[module2] -> ...
+    After mapping:
+    data[ACTIONS]: [dict mapping episode-identifying tuples to lists of data]
+    Note that episode-identifying tuples have the form of: (episode_id,) in the
+    single-agent case and (ma_episode_id, agent_id, module_id) in the multi-agent
+    case.
+    """
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # This Connector should only be used in a multi-agent setting.
+        assert isinstance(episodes[0], MultiAgentEpisode)
+        agent_data = defaultdict(dict)
+        for module_id, module_data in batch.items():
+            for column, values_dict in module_data.items():
+                agent_data[column].update(values_dict)
+        return dict(agent_data)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/numpy_to_tensor.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from typing import Any, Dict, List, Optional
+import gymnasium as gym
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core import DEFAULT_MODULE_ID
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.torch_utils import convert_to_torch_tensor
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class NumpyToTensor(ConnectorV2):
+    """Converts numpy arrays across the entire input data into (framework) tensors.
+    The framework information is received via the provided `rl_module` arg in the
+    `__call__()` method.
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    This ConnectorV2:
+    - Loops through the input `data` and converts all found numpy arrays into
+    framework-specific tensors (possibly on a GPU).
+    """
+    def __init__(
+        self,
+        input_observation_space: Optional[gym.Space] = None,
+        input_action_space: Optional[gym.Space] = None,
+        *,
+        as_learner_connector: bool = False,
+        pin_mempory: Optional[bool] = None,
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        """Initializes a NumpyToTensor instance.
+        Args:
+            as_learner_connector: Whether this ConnectorV2 piece is used inside a
+                LearnerConnectorPipeline or not.
+            pin_mempory: Whether to pin memory when creating (torch) tensors.
+                If None (default), pins memory if `as_learner_connector` is True,
+                otherwise doesn't pin memory.
+            device: An optional device to move the resulting tensors to. If not
+                provided, all data will be left on the CPU.
+            **kwargs:
+        """
+        super().__init__(
+            input_observation_space=input_observation_space,
+            input_action_space=input_action_space,
+            **kwargs,
+        )
+        self._as_learner_connector = as_learner_connector
+        self._pin_memory = (
+            pin_mempory if pin_mempory is not None else self._as_learner_connector
+        )
+        self._device = device
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        is_single_agent = False
+        is_multi_rl_module = isinstance(rl_module, MultiRLModule)
+        # `data` already a ModuleID to batch mapping format.
+        if not (is_multi_rl_module and all(c in rl_module._rl_modules for c in batch)):
+            is_single_agent = True
+            batch = {DEFAULT_MODULE_ID: batch}
+        for module_id, module_data in batch.copy().items():
+            infos = module_data.pop(Columns.INFOS, None)
+            if rl_module.framework == "torch":
+                module_data = convert_to_torch_tensor(
+                    module_data, pin_memory=self._pin_memory, device=self._device
+                )
+            else:
+                raise ValueError(
+                    "`NumpyToTensor`does NOT support frameworks other than torch!"
+                )
+            if infos is not None:
+                module_data[Columns.INFOS] = infos
+            # Early out with data under(!) `DEFAULT_MODULE_ID`, b/c we are in plain
+            # single-agent mode.
+            if is_single_agent:
+                return module_data
+            batch[module_id] = module_data
+        return batch

.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/tensor_to_numpy.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from typing import Any, Dict, List, Optional
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.numpy import convert_to_numpy
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class TensorToNumpy(ConnectorV2):
+    """Converts (framework) tensors across the entire input data into numpy arrays."""
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        return convert_to_numpy(batch)

.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.7 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/env_to_module_pipeline.cpython-311.pyc ADDED Viewed

Binary file (2.61 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/flatten_observations.cpython-311.pyc ADDED Viewed

Binary file (9.32 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/frame_stacking.cpython-311.pyc ADDED Viewed

Binary file (465 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/observation_preprocessor.cpython-311.pyc ADDED Viewed

Binary file (3.77 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/write_observations_to_episodes.cpython-311.pyc ADDED Viewed

Binary file (5.87 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_one_ts_to_episodes_and_truncate.cpython-311.pyc ADDED Viewed

Binary file (6.34 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/frame_stacking.cpython-311.pyc ADDED Viewed

Binary file (455 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/learner_connector_pipeline.cpython-311.pyc ADDED Viewed

Binary file (2.75 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from typing import Any, Dict, List, Optional
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class AddColumnsFromEpisodesToTrainBatch(ConnectorV2):
+    """Adds infos/actions/rewards/terminateds/... to train batch.
+    Note: This is one of the default Learner ConnectorV2 pieces that are added
+    automatically by RLlib into every Learner connector pipeline, unless
+    `config.add_default_connectors_to_learner_pipeline` is set to False.
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddTimeDimToBatchAndZeroPad,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    Does NOT add observations to train batch (these should have already been added
+    by another ConnectorV2 piece: `AddObservationsToTrainBatch` in the same pipeline).
+    If provided with `episodes` data, this connector piece makes sure that the final
+    train batch going into the RLModule for updating (`forward_train()` call) contains
+    at the minimum:
+    - Observations: From all episodes under the Columns.OBS key.
+    - Actions, rewards, terminal/truncation flags: From all episodes under the
+    respective keys.
+    - All data inside the episodes' `extra_model_outs` property, e.g. action logp and
+    action probs under the respective keys.
+    - Internal states: These will NOT be added to the batch by this connector piece
+    as this functionality is handled by a different default connector piece:
+    `AddStatesFromEpisodesToBatch`.
+    If the user wants to customize their own data under the given keys (e.g. obs,
+    actions, ...), they can extract from the episodes or recompute from `data`
+    their own data and store it in `data` under those keys. In this case, the default
+    connector will not change the data under these keys and simply act as a
+    pass-through.
+    """
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Optional[Dict[str, Any]],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # Infos.
+        if Columns.INFOS not in batch:
+            for sa_episode in self.single_agent_episode_iterator(
+                episodes,
+                agents_that_stepped_only=False,
+            ):
+                self.add_n_batch_items(
+                    batch,
+                    Columns.INFOS,
+                    items_to_add=sa_episode.get_infos(slice(0, len(sa_episode))),
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+        # Actions.
+        if Columns.ACTIONS not in batch:
+            for sa_episode in self.single_agent_episode_iterator(
+                episodes,
+                agents_that_stepped_only=False,
+            ):
+                self.add_n_batch_items(
+                    batch,
+                    Columns.ACTIONS,
+                    items_to_add=[
+                        sa_episode.get_actions(indices=ts)
+                        for ts in range(len(sa_episode))
+                    ],
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+        # Rewards.
+        if Columns.REWARDS not in batch:
+            for sa_episode in self.single_agent_episode_iterator(
+                episodes,
+                agents_that_stepped_only=False,
+            ):
+                self.add_n_batch_items(
+                    batch,
+                    Columns.REWARDS,
+                    items_to_add=[
+                        sa_episode.get_rewards(indices=ts)
+                        for ts in range(len(sa_episode))
+                    ],
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+        # Terminateds.
+        if Columns.TERMINATEDS not in batch:
+            for sa_episode in self.single_agent_episode_iterator(
+                episodes,
+                agents_that_stepped_only=False,
+            ):
+                self.add_n_batch_items(
+                    batch,
+                    Columns.TERMINATEDS,
+                    items_to_add=(
+                        [False] * (len(sa_episode) - 1) + [sa_episode.is_terminated]
+                        if len(sa_episode) > 0
+                        else []
+                    ),
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+        # Truncateds.
+        if Columns.TRUNCATEDS not in batch:
+            for sa_episode in self.single_agent_episode_iterator(
+                episodes,
+                agents_that_stepped_only=False,
+            ):
+                self.add_n_batch_items(
+                    batch,
+                    Columns.TRUNCATEDS,
+                    items_to_add=(
+                        [False] * (len(sa_episode) - 1) + [sa_episode.is_truncated]
+                        if len(sa_episode) > 0
+                        else []
+                    ),
+                    num_items=len(sa_episode),
+                    single_agent_episode=sa_episode,
+                )
+        # Extra model outputs (except for STATE_OUT, which will be handled by another
+        # default connector piece). Also, like with all the fields above, skip
+        # those that the user already seemed to have populated via custom connector
+        # pieces.
+        skip_columns = set(batch.keys()) | {Columns.STATE_IN, Columns.STATE_OUT}
+        for sa_episode in self.single_agent_episode_iterator(
+            episodes,
+            agents_that_stepped_only=False,
+        ):
+            for column in sa_episode.extra_model_outputs.keys():
+                if column not in skip_columns:
+                    self.add_n_batch_items(
+                        batch,
+                        column,
+                        items_to_add=[
+                            sa_episode.get_extra_model_outputs(key=column, indices=ts)
+                            for ts in range(len(sa_episode))
+                        ],
+                        num_items=len(sa_episode),
+                        single_agent_episode=sa_episode,
+                    )
+        return batch

.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from typing import Any, Dict, List, Optional
+from ray.rllib.core.columns import Columns
+from ray.rllib.connectors.connector_v2 import ConnectorV2
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
+@PublicAPI(stability="alpha")
+class AddNextObservationsFromEpisodesToTrainBatch(ConnectorV2):
+    """Adds the NEXT_OBS column with the correct episode observations to train batch.
+    - Operates on a list of Episode objects.
+    - Gets all observation(s) from all the given episodes (except the very first ones)
+    and adds them to the batch under construction in the NEXT_OBS column (as a list of
+    individual observations).
+    - Does NOT alter any observations (or other data) in the given episodes.
+    - Can be used in Learner connector pipelines.
+    .. testcode::
+        import gymnasium as gym
+        import numpy as np
+        from ray.rllib.connectors.learner import (
+            AddNextObservationsFromEpisodesToTrainBatch
+        )
+        from ray.rllib.core.columns import Columns
+        from ray.rllib.env.single_agent_episode import SingleAgentEpisode
+        from ray.rllib.utils.test_utils import check
+        # Create two dummy SingleAgentEpisodes, each containing 3 observations,
+        # 2 actions and 2 rewards (both episodes are length=2).
+        obs_space = gym.spaces.Box(-1.0, 1.0, (2,), np.float32)
+        act_space = gym.spaces.Discrete(2)
+        episodes = [SingleAgentEpisode(
+            observations=[obs_space.sample(), obs_space.sample(), obs_space.sample()],
+            actions=[act_space.sample(), act_space.sample()],
+            rewards=[1.0, 2.0],
+            len_lookback_buffer=0,
+        ) for _ in range(2)]
+        eps_1_next_obses = episodes[0].get_observations([1, 2])
+        eps_2_next_obses = episodes[1].get_observations([1, 2])
+        print(f"1st Episode's next obses are {eps_1_next_obses}")
+        print(f"2nd Episode's next obses are {eps_2_next_obses}")
+        # Create an instance of this class.
+        connector = AddNextObservationsFromEpisodesToTrainBatch()
+        # Call the connector with the two created episodes.
+        # Note that this particular connector works without an RLModule, so we
+        # simplify here for the sake of this example.
+        output_data = connector(
+            rl_module=None,
+            batch={},
+            episodes=episodes,
+            explore=True,
+            shared_data={},
+        )
+        # The output data should now contain the last observations of both episodes,
+        # in a "per-episode organized" fashion.
+        check(
+            output_data,
+            {
+                Columns.NEXT_OBS: {
+                    (episodes[0].id_,): eps_1_next_obses,
+                    (episodes[1].id_,): eps_2_next_obses,
+                },
+            },
+        )
+    """
+    @override(ConnectorV2)
+    def __call__(
+        self,
+        *,
+        rl_module: RLModule,
+        batch: Dict[str, Any],
+        episodes: List[EpisodeType],
+        explore: Optional[bool] = None,
+        shared_data: Optional[dict] = None,
+        **kwargs,
+    ) -> Any:
+        # If "obs" already in `batch`, early out.
+        if Columns.NEXT_OBS in batch:
+            return batch
+        for sa_episode in self.single_agent_episode_iterator(
+            # This is a Learner-only connector -> Get all episodes (for train batch).
+            episodes,
+            agents_that_stepped_only=False,
+        ):
+            self.add_n_batch_items(
+                batch,
+                Columns.NEXT_OBS,
+                items_to_add=sa_episode.get_observations(slice(1, len(sa_episode) + 1)),
+                num_items=len(sa_episode),
+                single_agent_episode=sa_episode,
+            )
+        return batch