Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .venv/lib/python3.11/site-packages/ray/rllib/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/clip.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/immutable.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/lambdas.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/normalize.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/pipeline.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/clip.py +41 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/immutable.py +40 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/lambdas.py +76 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/normalize.py +44 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/pipeline.py +61 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/clip_reward.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/env_sampling.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/pipeline.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/synced_filter.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/view_requirement.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__init__.py +22 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_observations_from_episodes_to_batch.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_states_from_episodes_to_batch.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_time_dim_to_batch_and_zero_pad.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/agent_to_module_mapping.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/batch_individual_items.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/frame_stacking.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/module_to_agent_unmapping.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/numpy_to_tensor.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/tensor_to_numpy.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_observations_from_episodes_to_batch.py +180 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_states_from_episodes_to_batch.py +348 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_time_dim_to_batch_and_zero_pad.py +302 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/agent_to_module_mapping.py +291 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/batch_individual_items.py +200 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/frame_stacking.py +147 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/module_to_agent_unmapping.py +48 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/numpy_to_tensor.py +125 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/tensor_to_numpy.py +26 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/env_to_module_pipeline.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/flatten_observations.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/frame_stacking.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/observation_preprocessor.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/write_observations_to_episodes.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_one_ts_to_episodes_and_truncate.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/frame_stacking.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/learner_connector_pipeline.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py +166 -0
- .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py +103 -0
.venv/lib/python3.11/site-packages/ray/rllib/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (2.5 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__init__.py
ADDED
|
File without changes
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (200 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/clip.cpython-311.pyc
ADDED
|
Binary file (2.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/immutable.cpython-311.pyc
ADDED
|
Binary file (2.3 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/lambdas.cpython-311.pyc
ADDED
|
Binary file (3.73 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/normalize.cpython-311.pyc
ADDED
|
Binary file (2.75 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/pipeline.cpython-311.pyc
ADDED
|
Binary file (4.02 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/clip.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
from ray.rllib.connectors.connector import (
|
| 4 |
+
ActionConnector,
|
| 5 |
+
ConnectorContext,
|
| 6 |
+
)
|
| 7 |
+
from ray.rllib.connectors.registry import register_connector
|
| 8 |
+
from ray.rllib.utils.spaces.space_utils import clip_action, get_base_struct_from_space
|
| 9 |
+
from ray.rllib.utils.typing import ActionConnectorDataType
|
| 10 |
+
from ray.rllib.utils.annotations import OldAPIStack
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@OldAPIStack
|
| 14 |
+
class ClipActionsConnector(ActionConnector):
|
| 15 |
+
def __init__(self, ctx: ConnectorContext):
|
| 16 |
+
super().__init__(ctx)
|
| 17 |
+
|
| 18 |
+
self._action_space_struct = get_base_struct_from_space(ctx.action_space)
|
| 19 |
+
|
| 20 |
+
def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
|
| 21 |
+
assert isinstance(
|
| 22 |
+
ac_data.output, tuple
|
| 23 |
+
), "Action connector requires PolicyOutputType data."
|
| 24 |
+
|
| 25 |
+
actions, states, fetches = ac_data.output
|
| 26 |
+
return ActionConnectorDataType(
|
| 27 |
+
ac_data.env_id,
|
| 28 |
+
ac_data.agent_id,
|
| 29 |
+
ac_data.input_dict,
|
| 30 |
+
(clip_action(actions, self._action_space_struct), states, fetches),
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
def to_state(self):
|
| 34 |
+
return ClipActionsConnector.__name__, None
|
| 35 |
+
|
| 36 |
+
@staticmethod
|
| 37 |
+
def from_state(ctx: ConnectorContext, params: Any):
|
| 38 |
+
return ClipActionsConnector(ctx)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
register_connector(ClipActionsConnector.__name__, ClipActionsConnector)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/immutable.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
import tree # pip install dm_tree
|
| 4 |
+
|
| 5 |
+
from ray.rllib.connectors.connector import (
|
| 6 |
+
ActionConnector,
|
| 7 |
+
ConnectorContext,
|
| 8 |
+
)
|
| 9 |
+
from ray.rllib.connectors.registry import register_connector
|
| 10 |
+
from ray.rllib.utils.numpy import make_action_immutable
|
| 11 |
+
from ray.rllib.utils.typing import ActionConnectorDataType
|
| 12 |
+
from ray.rllib.utils.annotations import OldAPIStack
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@OldAPIStack
|
| 16 |
+
class ImmutableActionsConnector(ActionConnector):
|
| 17 |
+
def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
|
| 18 |
+
assert isinstance(
|
| 19 |
+
ac_data.output, tuple
|
| 20 |
+
), "Action connector requires PolicyOutputType data."
|
| 21 |
+
|
| 22 |
+
actions, states, fetches = ac_data.output
|
| 23 |
+
tree.traverse(make_action_immutable, actions, top_down=False)
|
| 24 |
+
|
| 25 |
+
return ActionConnectorDataType(
|
| 26 |
+
ac_data.env_id,
|
| 27 |
+
ac_data.agent_id,
|
| 28 |
+
ac_data.input_dict,
|
| 29 |
+
(actions, states, fetches),
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def to_state(self):
|
| 33 |
+
return ImmutableActionsConnector.__name__, None
|
| 34 |
+
|
| 35 |
+
@staticmethod
|
| 36 |
+
def from_state(ctx: ConnectorContext, params: Any):
|
| 37 |
+
return ImmutableActionsConnector(ctx)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
register_connector(ImmutableActionsConnector.__name__, ImmutableActionsConnector)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/lambdas.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Callable, Dict, Type
|
| 2 |
+
|
| 3 |
+
from ray.rllib.connectors.connector import (
|
| 4 |
+
ActionConnector,
|
| 5 |
+
ConnectorContext,
|
| 6 |
+
)
|
| 7 |
+
from ray.rllib.connectors.registry import register_connector
|
| 8 |
+
from ray.rllib.utils.numpy import convert_to_numpy
|
| 9 |
+
from ray.rllib.utils.typing import (
|
| 10 |
+
ActionConnectorDataType,
|
| 11 |
+
PolicyOutputType,
|
| 12 |
+
StateBatches,
|
| 13 |
+
TensorStructType,
|
| 14 |
+
)
|
| 15 |
+
from ray.rllib.utils.annotations import OldAPIStack
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@OldAPIStack
|
| 19 |
+
def register_lambda_action_connector(
|
| 20 |
+
name: str, fn: Callable[[TensorStructType, StateBatches, Dict], PolicyOutputType]
|
| 21 |
+
) -> Type[ActionConnector]:
|
| 22 |
+
"""A util to register any function transforming PolicyOutputType as an ActionConnector.
|
| 23 |
+
|
| 24 |
+
The only requirement is that fn should take actions, states, and fetches as input,
|
| 25 |
+
and return transformed actions, states, and fetches.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
name: Name of the resulting actor connector.
|
| 29 |
+
fn: The function that transforms PolicyOutputType.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
A new ActionConnector class that transforms PolicyOutputType using fn.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
class LambdaActionConnector(ActionConnector):
|
| 36 |
+
def transform(
|
| 37 |
+
self, ac_data: ActionConnectorDataType
|
| 38 |
+
) -> ActionConnectorDataType:
|
| 39 |
+
assert isinstance(
|
| 40 |
+
ac_data.output, tuple
|
| 41 |
+
), "Action connector requires PolicyOutputType data."
|
| 42 |
+
|
| 43 |
+
actions, states, fetches = ac_data.output
|
| 44 |
+
return ActionConnectorDataType(
|
| 45 |
+
ac_data.env_id,
|
| 46 |
+
ac_data.agent_id,
|
| 47 |
+
ac_data.input_dict,
|
| 48 |
+
fn(actions, states, fetches),
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
def to_state(self):
|
| 52 |
+
return name, None
|
| 53 |
+
|
| 54 |
+
@staticmethod
|
| 55 |
+
def from_state(ctx: ConnectorContext, params: Any):
|
| 56 |
+
return LambdaActionConnector(ctx)
|
| 57 |
+
|
| 58 |
+
LambdaActionConnector.__name__ = name
|
| 59 |
+
LambdaActionConnector.__qualname__ = name
|
| 60 |
+
|
| 61 |
+
register_connector(name, LambdaActionConnector)
|
| 62 |
+
|
| 63 |
+
return LambdaActionConnector
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Convert actions and states into numpy arrays if necessary.
|
| 67 |
+
ConvertToNumpyConnector = OldAPIStack(
|
| 68 |
+
register_lambda_action_connector(
|
| 69 |
+
"ConvertToNumpyConnector",
|
| 70 |
+
lambda actions, states, fetches: (
|
| 71 |
+
convert_to_numpy(actions),
|
| 72 |
+
convert_to_numpy(states),
|
| 73 |
+
fetches,
|
| 74 |
+
),
|
| 75 |
+
),
|
| 76 |
+
)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/normalize.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
from ray.rllib.connectors.connector import (
|
| 4 |
+
ActionConnector,
|
| 5 |
+
ConnectorContext,
|
| 6 |
+
)
|
| 7 |
+
from ray.rllib.connectors.registry import register_connector
|
| 8 |
+
from ray.rllib.utils.spaces.space_utils import (
|
| 9 |
+
get_base_struct_from_space,
|
| 10 |
+
unsquash_action,
|
| 11 |
+
)
|
| 12 |
+
from ray.rllib.utils.typing import ActionConnectorDataType
|
| 13 |
+
from ray.rllib.utils.annotations import OldAPIStack
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@OldAPIStack
|
| 17 |
+
class NormalizeActionsConnector(ActionConnector):
|
| 18 |
+
def __init__(self, ctx: ConnectorContext):
|
| 19 |
+
super().__init__(ctx)
|
| 20 |
+
|
| 21 |
+
self._action_space_struct = get_base_struct_from_space(ctx.action_space)
|
| 22 |
+
|
| 23 |
+
def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
|
| 24 |
+
assert isinstance(
|
| 25 |
+
ac_data.output, tuple
|
| 26 |
+
), "Action connector requires PolicyOutputType data."
|
| 27 |
+
|
| 28 |
+
actions, states, fetches = ac_data.output
|
| 29 |
+
return ActionConnectorDataType(
|
| 30 |
+
ac_data.env_id,
|
| 31 |
+
ac_data.agent_id,
|
| 32 |
+
ac_data.input_dict,
|
| 33 |
+
(unsquash_action(actions, self._action_space_struct), states, fetches),
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
def to_state(self):
|
| 37 |
+
return NormalizeActionsConnector.__name__, None
|
| 38 |
+
|
| 39 |
+
@staticmethod
|
| 40 |
+
def from_state(ctx: ConnectorContext, params: Any):
|
| 41 |
+
return NormalizeActionsConnector(ctx)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
register_connector(NormalizeActionsConnector.__name__, NormalizeActionsConnector)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/pipeline.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Any, List
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
|
| 5 |
+
from ray.rllib.connectors.connector import (
|
| 6 |
+
ActionConnector,
|
| 7 |
+
Connector,
|
| 8 |
+
ConnectorContext,
|
| 9 |
+
ConnectorPipeline,
|
| 10 |
+
)
|
| 11 |
+
from ray.rllib.connectors.registry import get_connector, register_connector
|
| 12 |
+
from ray.rllib.utils.annotations import OldAPIStack
|
| 13 |
+
from ray.rllib.utils.typing import ActionConnectorDataType
|
| 14 |
+
from ray.util.timer import _Timer
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@OldAPIStack
|
| 21 |
+
class ActionConnectorPipeline(ConnectorPipeline, ActionConnector):
|
| 22 |
+
def __init__(self, ctx: ConnectorContext, connectors: List[Connector]):
|
| 23 |
+
super().__init__(ctx, connectors)
|
| 24 |
+
self.timers = defaultdict(_Timer)
|
| 25 |
+
|
| 26 |
+
def __call__(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
|
| 27 |
+
for c in self.connectors:
|
| 28 |
+
timer = self.timers[str(c)]
|
| 29 |
+
with timer:
|
| 30 |
+
ac_data = c(ac_data)
|
| 31 |
+
return ac_data
|
| 32 |
+
|
| 33 |
+
def to_state(self):
|
| 34 |
+
children = []
|
| 35 |
+
for c in self.connectors:
|
| 36 |
+
state = c.to_state()
|
| 37 |
+
assert isinstance(state, tuple) and len(state) == 2, (
|
| 38 |
+
"Serialized connector state must be in the format of "
|
| 39 |
+
f"Tuple[name: str, params: Any]. Instead we got {state}"
|
| 40 |
+
f"for connector {c.__name__}."
|
| 41 |
+
)
|
| 42 |
+
children.append(state)
|
| 43 |
+
return ActionConnectorPipeline.__name__, children
|
| 44 |
+
|
| 45 |
+
@staticmethod
|
| 46 |
+
def from_state(ctx: ConnectorContext, params: Any):
|
| 47 |
+
assert (
|
| 48 |
+
type(params) is list
|
| 49 |
+
), "ActionConnectorPipeline takes a list of connector params."
|
| 50 |
+
connectors = []
|
| 51 |
+
for state in params:
|
| 52 |
+
try:
|
| 53 |
+
name, subparams = state
|
| 54 |
+
connectors.append(get_connector(name, ctx, subparams))
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"Failed to de-serialize connector state: {state}")
|
| 57 |
+
raise e
|
| 58 |
+
return ActionConnectorPipeline(ctx, connectors)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
register_connector(ActionConnectorPipeline.__name__, ActionConnectorPipeline)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (199 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/clip_reward.cpython-311.pyc
ADDED
|
Binary file (3.06 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/env_sampling.cpython-311.pyc
ADDED
|
Binary file (2.11 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/pipeline.cpython-311.pyc
ADDED
|
Binary file (4.74 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/synced_filter.cpython-311.pyc
ADDED
|
Binary file (3.05 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/view_requirement.cpython-311.pyc
ADDED
|
Binary file (6.24 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import (
|
| 2 |
+
AddObservationsFromEpisodesToBatch,
|
| 3 |
+
)
|
| 4 |
+
from ray.rllib.connectors.common.add_states_from_episodes_to_batch import (
|
| 5 |
+
AddStatesFromEpisodesToBatch,
|
| 6 |
+
)
|
| 7 |
+
from ray.rllib.connectors.common.add_time_dim_to_batch_and_zero_pad import (
|
| 8 |
+
AddTimeDimToBatchAndZeroPad,
|
| 9 |
+
)
|
| 10 |
+
from ray.rllib.connectors.common.agent_to_module_mapping import AgentToModuleMapping
|
| 11 |
+
from ray.rllib.connectors.common.batch_individual_items import BatchIndividualItems
|
| 12 |
+
from ray.rllib.connectors.common.numpy_to_tensor import NumpyToTensor
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
__all__ = [
|
| 16 |
+
"AddObservationsFromEpisodesToBatch",
|
| 17 |
+
"AddStatesFromEpisodesToBatch",
|
| 18 |
+
"AddTimeDimToBatchAndZeroPad",
|
| 19 |
+
"AgentToModuleMapping",
|
| 20 |
+
"BatchIndividualItems",
|
| 21 |
+
"NumpyToTensor",
|
| 22 |
+
]
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.02 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_observations_from_episodes_to_batch.cpython-311.pyc
ADDED
|
Binary file (7.36 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_states_from_episodes_to_batch.cpython-311.pyc
ADDED
|
Binary file (13.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_time_dim_to_batch_and_zero_pad.cpython-311.pyc
ADDED
|
Binary file (12 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/agent_to_module_mapping.cpython-311.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/batch_individual_items.cpython-311.pyc
ADDED
|
Binary file (8.01 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/frame_stacking.cpython-311.pyc
ADDED
|
Binary file (7.19 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/module_to_agent_unmapping.cpython-311.pyc
ADDED
|
Binary file (2.83 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/numpy_to_tensor.cpython-311.pyc
ADDED
|
Binary file (6.02 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/tensor_to_numpy.cpython-311.pyc
ADDED
|
Binary file (1.79 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_observations_from_episodes_to_batch.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
import gymnasium as gym
|
| 4 |
+
|
| 5 |
+
from ray.rllib.core.columns import Columns
|
| 6 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 7 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 8 |
+
from ray.rllib.utils.annotations import override
|
| 9 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 10 |
+
from ray.util.annotations import PublicAPI
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@PublicAPI(stability="alpha")
|
| 14 |
+
class AddObservationsFromEpisodesToBatch(ConnectorV2):
|
| 15 |
+
"""Gets the last observation from a running episode and adds it to the batch.
|
| 16 |
+
|
| 17 |
+
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
|
| 18 |
+
are added automatically by RLlib into every env-to-module/Learner connector
|
| 19 |
+
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
|
| 20 |
+
`config.add_default_connectors_to_learner_pipeline ` are set to
|
| 21 |
+
False.
|
| 22 |
+
|
| 23 |
+
The default env-to-module connector pipeline is:
|
| 24 |
+
[
|
| 25 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 26 |
+
AddObservationsFromEpisodesToBatch,
|
| 27 |
+
AddTimeDimToBatchAndZeroPad,
|
| 28 |
+
AddStatesFromEpisodesToBatch,
|
| 29 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 30 |
+
BatchIndividualItems,
|
| 31 |
+
NumpyToTensor,
|
| 32 |
+
]
|
| 33 |
+
The default Learner connector pipeline is:
|
| 34 |
+
[
|
| 35 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 36 |
+
AddObservationsFromEpisodesToBatch,
|
| 37 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 38 |
+
AddTimeDimToBatchAndZeroPad,
|
| 39 |
+
AddStatesFromEpisodesToBatch,
|
| 40 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 41 |
+
BatchIndividualItems,
|
| 42 |
+
NumpyToTensor,
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
This ConnectorV2:
|
| 46 |
+
- Operates on a list of Episode objects (single- or multi-agent).
|
| 47 |
+
- Gets the most recent observation(s) from all the given episodes and adds them
|
| 48 |
+
to the batch under construction (as a list of individual observations).
|
| 49 |
+
- Does NOT alter any observations (or other data) in the given episodes.
|
| 50 |
+
- Can be used in EnvToModule and Learner connector pipelines.
|
| 51 |
+
|
| 52 |
+
.. testcode::
|
| 53 |
+
|
| 54 |
+
import gymnasium as gym
|
| 55 |
+
import numpy as np
|
| 56 |
+
|
| 57 |
+
from ray.rllib.connectors.common import AddObservationsFromEpisodesToBatch
|
| 58 |
+
from ray.rllib.env.single_agent_episode import SingleAgentEpisode
|
| 59 |
+
from ray.rllib.utils.test_utils import check
|
| 60 |
+
|
| 61 |
+
# Create two dummy SingleAgentEpisodes, each containing 2 observations,
|
| 62 |
+
# 1 action and 1 reward (both are length=1).
|
| 63 |
+
obs_space = gym.spaces.Box(-1.0, 1.0, (2,), np.float32)
|
| 64 |
+
act_space = gym.spaces.Discrete(2)
|
| 65 |
+
|
| 66 |
+
episodes = [SingleAgentEpisode(
|
| 67 |
+
observations=[obs_space.sample(), obs_space.sample()],
|
| 68 |
+
actions=[act_space.sample()],
|
| 69 |
+
rewards=[1.0],
|
| 70 |
+
len_lookback_buffer=0,
|
| 71 |
+
) for _ in range(2)]
|
| 72 |
+
eps_1_last_obs = episodes[0].get_observations(-1)
|
| 73 |
+
eps_2_last_obs = episodes[1].get_observations(-1)
|
| 74 |
+
print(f"1st Episode's last obs is {eps_1_last_obs}")
|
| 75 |
+
print(f"2nd Episode's last obs is {eps_2_last_obs}")
|
| 76 |
+
|
| 77 |
+
# Create an instance of this class.
|
| 78 |
+
connector = AddObservationsFromEpisodesToBatch()
|
| 79 |
+
|
| 80 |
+
# Call the connector with the two created episodes.
|
| 81 |
+
# Note that this particular connector works without an RLModule, so we
|
| 82 |
+
# simplify here for the sake of this example.
|
| 83 |
+
output_batch = connector(
|
| 84 |
+
rl_module=None,
|
| 85 |
+
batch={},
|
| 86 |
+
episodes=episodes,
|
| 87 |
+
explore=True,
|
| 88 |
+
shared_data={},
|
| 89 |
+
)
|
| 90 |
+
# The output data should now contain the last observations of both episodes,
|
| 91 |
+
# in a "per-episode organized" fashion.
|
| 92 |
+
check(
|
| 93 |
+
output_batch,
|
| 94 |
+
{
|
| 95 |
+
"obs": {
|
| 96 |
+
(episodes[0].id_,): [eps_1_last_obs],
|
| 97 |
+
(episodes[1].id_,): [eps_2_last_obs],
|
| 98 |
+
},
|
| 99 |
+
},
|
| 100 |
+
)
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
def __init__(
|
| 104 |
+
self,
|
| 105 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 106 |
+
input_action_space: Optional[gym.Space] = None,
|
| 107 |
+
*,
|
| 108 |
+
as_learner_connector: bool = False,
|
| 109 |
+
**kwargs,
|
| 110 |
+
):
|
| 111 |
+
"""Initializes a AddObservationsFromEpisodesToBatch instance.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
as_learner_connector: Whether this connector is part of a Learner connector
|
| 115 |
+
pipeline, as opposed to a env-to-module pipeline. As a Learner
|
| 116 |
+
connector, it will add an entire Episode's observations (each timestep)
|
| 117 |
+
to the batch.
|
| 118 |
+
"""
|
| 119 |
+
super().__init__(
|
| 120 |
+
input_observation_space=input_observation_space,
|
| 121 |
+
input_action_space=input_action_space,
|
| 122 |
+
**kwargs,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
self._as_learner_connector = as_learner_connector
|
| 126 |
+
|
| 127 |
+
@override(ConnectorV2)
|
| 128 |
+
def __call__(
|
| 129 |
+
self,
|
| 130 |
+
*,
|
| 131 |
+
rl_module: RLModule,
|
| 132 |
+
batch: Dict[str, Any],
|
| 133 |
+
episodes: List[EpisodeType],
|
| 134 |
+
explore: Optional[bool] = None,
|
| 135 |
+
shared_data: Optional[dict] = None,
|
| 136 |
+
**kwargs,
|
| 137 |
+
) -> Any:
|
| 138 |
+
# If "obs" already in data, early out.
|
| 139 |
+
if Columns.OBS in batch:
|
| 140 |
+
return batch
|
| 141 |
+
for i, sa_episode in enumerate(
|
| 142 |
+
self.single_agent_episode_iterator(
|
| 143 |
+
episodes,
|
| 144 |
+
# If Learner connector, get all episodes (for train batch).
|
| 145 |
+
# If EnvToModule, get only those ongoing episodes that just had their
|
| 146 |
+
# agent step (b/c those are the ones we need to compute actions for
|
| 147 |
+
# next).
|
| 148 |
+
agents_that_stepped_only=not self._as_learner_connector,
|
| 149 |
+
)
|
| 150 |
+
):
|
| 151 |
+
if self._as_learner_connector:
|
| 152 |
+
# TODO (sven): Resolve this hack by adding a new connector piece that
|
| 153 |
+
# performs this very task.
|
| 154 |
+
if "_" not in sa_episode.id_:
|
| 155 |
+
sa_episode.id_ += "_" + str(i)
|
| 156 |
+
|
| 157 |
+
self.add_n_batch_items(
|
| 158 |
+
batch,
|
| 159 |
+
Columns.OBS,
|
| 160 |
+
# Add all observations, except the very last one.
|
| 161 |
+
# For a terminated episode, this is the terminal observation that
|
| 162 |
+
# has no value for training.
|
| 163 |
+
# For a truncated episode, algorithms either add an extra NEXT_OBS
|
| 164 |
+
# column to the batch (ex. DQN) or extend the episode length by one
|
| 165 |
+
# (using a separate connector piece and this truncated last obs),
|
| 166 |
+
# then bootstrap the value estimation for that extra timestep.
|
| 167 |
+
items_to_add=sa_episode.get_observations(slice(0, len(sa_episode))),
|
| 168 |
+
num_items=len(sa_episode),
|
| 169 |
+
single_agent_episode=sa_episode,
|
| 170 |
+
)
|
| 171 |
+
else:
|
| 172 |
+
assert not sa_episode.is_numpy
|
| 173 |
+
self.add_batch_item(
|
| 174 |
+
batch,
|
| 175 |
+
Columns.OBS,
|
| 176 |
+
item_to_add=sa_episode.get_observations(-1),
|
| 177 |
+
single_agent_episode=sa_episode,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
return batch
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_states_from_episodes_to_batch.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
from typing import Any, Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
import gymnasium as gym
|
| 5 |
+
import numpy as np
|
| 6 |
+
import tree # pip install dm_tree
|
| 7 |
+
|
| 8 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 9 |
+
from ray.rllib.core import DEFAULT_MODULE_ID
|
| 10 |
+
from ray.rllib.core.columns import Columns
|
| 11 |
+
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
|
| 12 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 13 |
+
from ray.rllib.utils.annotations import override
|
| 14 |
+
from ray.rllib.utils.numpy import convert_to_numpy
|
| 15 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 16 |
+
from ray.util.annotations import PublicAPI
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@PublicAPI(stability="alpha")
|
| 20 |
+
class AddStatesFromEpisodesToBatch(ConnectorV2):
|
| 21 |
+
"""Gets last STATE_OUT from running episode and adds it as STATE_IN to the batch.
|
| 22 |
+
|
| 23 |
+
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
|
| 24 |
+
are added automatically by RLlib into every env-to-module/Learner connector
|
| 25 |
+
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
|
| 26 |
+
`config.add_default_connectors_to_learner_pipeline ` are set to
|
| 27 |
+
False.
|
| 28 |
+
|
| 29 |
+
The default env-to-module connector pipeline is:
|
| 30 |
+
[
|
| 31 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 32 |
+
AddObservationsFromEpisodesToBatch,
|
| 33 |
+
AddTimeDimToBatchAndZeroPad,
|
| 34 |
+
AddStatesFromEpisodesToBatch,
|
| 35 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 36 |
+
BatchIndividualItems,
|
| 37 |
+
NumpyToTensor,
|
| 38 |
+
]
|
| 39 |
+
The default Learner connector pipeline is:
|
| 40 |
+
[
|
| 41 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 42 |
+
AddObservationsFromEpisodesToBatch,
|
| 43 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 44 |
+
AddTimeDimToBatchAndZeroPad,
|
| 45 |
+
AddStatesFromEpisodesToBatch,
|
| 46 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 47 |
+
BatchIndividualItems,
|
| 48 |
+
NumpyToTensor,
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
If the RLModule is stateful, the episodes' STATE_OUTS will be extracted
|
| 52 |
+
and restructured under a new STATE_IN key.
|
| 53 |
+
As a Learner connector, the resulting STATE_IN batch has the shape (B', ...).
|
| 54 |
+
Here, B' is the sum of splits we have to do over the given episodes, such that each
|
| 55 |
+
chunk is at most `max_seq_len` long (T-axis).
|
| 56 |
+
As a EnvToModule connector, the resulting STATE_IN batch simply consists of n
|
| 57 |
+
states coming from n vectorized environments/episodes.
|
| 58 |
+
|
| 59 |
+
Also, all other data (observations, rewards, etc.. if applicable) will be properly
|
| 60 |
+
reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
|
| 61 |
+
zero-padded, if necessary.
|
| 62 |
+
|
| 63 |
+
This ConnectorV2:
|
| 64 |
+
- Operates on a list of Episode objects.
|
| 65 |
+
- Gets the most recent STATE_OUT from all the given episodes and adds them under
|
| 66 |
+
the STATE_IN key to the batch under construction.
|
| 67 |
+
- Does NOT alter any data in the given episodes.
|
| 68 |
+
- Can be used in EnvToModule and Learner connector pipelines.
|
| 69 |
+
|
| 70 |
+
.. testcode::
|
| 71 |
+
|
| 72 |
+
from ray.rllib.connectors.common import AddStatesFromEpisodesToBatch
|
| 73 |
+
from ray.rllib.core.columns import Columns
|
| 74 |
+
from ray.rllib.env.single_agent_episode import SingleAgentEpisode
|
| 75 |
+
from ray.rllib.utils.test_utils import check
|
| 76 |
+
|
| 77 |
+
# Create a simple dummy class, pretending to be an RLModule with
|
| 78 |
+
# `get_initial_state`, `is_stateful` and `model_config` property defined:
|
| 79 |
+
class MyStateModule:
|
| 80 |
+
# dummy config
|
| 81 |
+
model_config = {"max_seq_len": 2}
|
| 82 |
+
|
| 83 |
+
def is_stateful(self):
|
| 84 |
+
return True
|
| 85 |
+
|
| 86 |
+
def get_initial_state(self):
|
| 87 |
+
return 0.0
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
# Create an empty episode. The connector should use the RLModule's initial state
|
| 91 |
+
# to populate STATE_IN for the next forward pass.
|
| 92 |
+
episode = SingleAgentEpisode()
|
| 93 |
+
|
| 94 |
+
rl_module = MyStateModule()
|
| 95 |
+
rl_module_init_state = rl_module.get_initial_state()
|
| 96 |
+
|
| 97 |
+
# Create an instance of this class (as a env-to-module connector).
|
| 98 |
+
connector = AddStatesFromEpisodesToBatch(as_learner_connector=False)
|
| 99 |
+
|
| 100 |
+
# Call the connector.
|
| 101 |
+
output_batch = connector(
|
| 102 |
+
rl_module=rl_module,
|
| 103 |
+
batch={},
|
| 104 |
+
episodes=[episode],
|
| 105 |
+
shared_data={},
|
| 106 |
+
)
|
| 107 |
+
# The output data's STATE_IN key should now contain the RLModule's initial state
|
| 108 |
+
# plus the one state out found in the episode in a "per-episode organized"
|
| 109 |
+
# fashion.
|
| 110 |
+
check(
|
| 111 |
+
output_batch[Columns.STATE_IN],
|
| 112 |
+
{
|
| 113 |
+
(episode.id_,): [rl_module_init_state],
|
| 114 |
+
},
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Create a SingleAgentEpisodes containing 5 observations,
|
| 118 |
+
# 4 actions and 4 rewards, and 4 STATE_OUTs.
|
| 119 |
+
# The same connector should now use the episode-stored last STATE_OUT as
|
| 120 |
+
# STATE_IN for the next forward pass.
|
| 121 |
+
episode = SingleAgentEpisode(
|
| 122 |
+
observations=[0, 1, 2, 3, 4],
|
| 123 |
+
actions=[1, 2, 3, 4],
|
| 124 |
+
rewards=[1.0, 2.0, 3.0, 4.0],
|
| 125 |
+
# STATE_OUT in episode will show up under STATE_IN in the batch.
|
| 126 |
+
extra_model_outputs={
|
| 127 |
+
Columns.STATE_OUT: [-4.0, -3.0, -2.0, -1.0],
|
| 128 |
+
},
|
| 129 |
+
len_lookback_buffer = 0,
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# Call the connector.
|
| 133 |
+
output_batch = connector(
|
| 134 |
+
rl_module=rl_module,
|
| 135 |
+
batch={},
|
| 136 |
+
episodes=[episode],
|
| 137 |
+
shared_data={},
|
| 138 |
+
)
|
| 139 |
+
# The output data's STATE_IN key should now contain the episode's last
|
| 140 |
+
# STATE_OUT, NOT the RLModule's initial state in a "per-episode organized"
|
| 141 |
+
# fashion.
|
| 142 |
+
check(
|
| 143 |
+
output_batch[Columns.STATE_IN],
|
| 144 |
+
{
|
| 145 |
+
# Expect the episode's last STATE_OUT.
|
| 146 |
+
(episode.id_,): [-1.0],
|
| 147 |
+
},
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# Create a new connector as a learner connector with a RNN seq len of 2 (for
|
| 151 |
+
# testing purposes only). Passing the same data through this learner connector,
|
| 152 |
+
# we expect the STATE_IN data to contain a) the initial module state and then
|
| 153 |
+
# every 2nd STATE_OUT stored in the episode.
|
| 154 |
+
connector = AddStatesFromEpisodesToBatch(as_learner_connector=True)
|
| 155 |
+
|
| 156 |
+
# Call the connector.
|
| 157 |
+
output_batch = connector(
|
| 158 |
+
rl_module=rl_module,
|
| 159 |
+
batch={},
|
| 160 |
+
episodes=[episode],
|
| 161 |
+
shared_data={},
|
| 162 |
+
)
|
| 163 |
+
check(
|
| 164 |
+
output_batch[Columns.STATE_IN],
|
| 165 |
+
{
|
| 166 |
+
# Expect initial module state + every 2nd STATE_OUT from episode, but
|
| 167 |
+
# not the very last one (just like the very last observation, this data
|
| 168 |
+
# is NOT passed through the forward_train, b/c there is nothing to learn
|
| 169 |
+
# at that timestep, unless we need to compute e.g. bootstrap value
|
| 170 |
+
# predictions).
|
| 171 |
+
# Also note that the different STATE_IN timesteps are already present
|
| 172 |
+
# as one batched item per episode in the list.
|
| 173 |
+
(episode.id_,): [rl_module_init_state, -3.0],
|
| 174 |
+
},
|
| 175 |
+
)
|
| 176 |
+
"""
|
| 177 |
+
|
| 178 |
+
def __init__(
|
| 179 |
+
self,
|
| 180 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 181 |
+
input_action_space: Optional[gym.Space] = None,
|
| 182 |
+
*,
|
| 183 |
+
as_learner_connector: bool = False,
|
| 184 |
+
**kwargs,
|
| 185 |
+
):
|
| 186 |
+
"""Initializes a AddObservationsFromEpisodesToBatch instance.
|
| 187 |
+
|
| 188 |
+
Args:
|
| 189 |
+
as_learner_connector: Whether this connector is part of a Learner connector
|
| 190 |
+
pipeline, as opposed to a env-to-module pipeline. As a Learner
|
| 191 |
+
connector, it will add an entire Episode's observations (each timestep)
|
| 192 |
+
to the batch.
|
| 193 |
+
"""
|
| 194 |
+
super().__init__(
|
| 195 |
+
input_observation_space=input_observation_space,
|
| 196 |
+
input_action_space=input_action_space,
|
| 197 |
+
**kwargs,
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
self._as_learner_connector = as_learner_connector
|
| 201 |
+
|
| 202 |
+
@override(ConnectorV2)
|
| 203 |
+
def __call__(
|
| 204 |
+
self,
|
| 205 |
+
*,
|
| 206 |
+
rl_module: RLModule,
|
| 207 |
+
batch: Dict[str, Any],
|
| 208 |
+
episodes: List[EpisodeType],
|
| 209 |
+
explore: Optional[bool] = None,
|
| 210 |
+
shared_data: Optional[dict] = None,
|
| 211 |
+
**kwargs,
|
| 212 |
+
) -> Any:
|
| 213 |
+
# If not stateful OR STATE_IN already in data, early out.
|
| 214 |
+
if not rl_module.is_stateful() or Columns.STATE_IN in batch:
|
| 215 |
+
return batch
|
| 216 |
+
|
| 217 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 218 |
+
episodes,
|
| 219 |
+
# If Learner connector, get all episodes (for train batch).
|
| 220 |
+
# If EnvToModule, get only those ongoing episodes that just had their
|
| 221 |
+
# agent step (b/c those are the ones we need to compute actions for next).
|
| 222 |
+
agents_that_stepped_only=not self._as_learner_connector,
|
| 223 |
+
):
|
| 224 |
+
if self._as_learner_connector:
|
| 225 |
+
# Multi-agent case: Extract correct single agent RLModule (to get its
|
| 226 |
+
# individual state).
|
| 227 |
+
if sa_episode.module_id is not None:
|
| 228 |
+
sa_module = rl_module[sa_episode.module_id]
|
| 229 |
+
else:
|
| 230 |
+
sa_module = (
|
| 231 |
+
rl_module[DEFAULT_MODULE_ID]
|
| 232 |
+
if isinstance(rl_module, MultiRLModule)
|
| 233 |
+
else rl_module
|
| 234 |
+
)
|
| 235 |
+
# This single-agent RLModule is NOT stateful -> Skip.
|
| 236 |
+
if not sa_module.is_stateful():
|
| 237 |
+
continue
|
| 238 |
+
|
| 239 |
+
max_seq_len = sa_module.model_config["max_seq_len"]
|
| 240 |
+
|
| 241 |
+
# look_back_state.shape=([state-dim],)
|
| 242 |
+
look_back_state = (
|
| 243 |
+
# Episode has a (reset) beginning -> Prepend initial
|
| 244 |
+
# state.
|
| 245 |
+
convert_to_numpy(sa_module.get_initial_state())
|
| 246 |
+
if sa_episode.t_started == 0
|
| 247 |
+
or (Columns.STATE_OUT not in sa_episode.extra_model_outputs)
|
| 248 |
+
# Episode starts somewhere in the middle (is a cut
|
| 249 |
+
# continuation chunk) -> Use previous chunk's last
|
| 250 |
+
# STATE_OUT as initial state.
|
| 251 |
+
else sa_episode.get_extra_model_outputs(
|
| 252 |
+
key=Columns.STATE_OUT,
|
| 253 |
+
indices=-1,
|
| 254 |
+
neg_index_as_lookback=True,
|
| 255 |
+
)
|
| 256 |
+
)
|
| 257 |
+
# If we have `"state_out"`s (e.g. from rollouts) use them for the
|
| 258 |
+
# `"state_in"`s.
|
| 259 |
+
if Columns.STATE_OUT in sa_episode.extra_model_outputs:
|
| 260 |
+
# state_outs.shape=(T,[state-dim]) T=episode len
|
| 261 |
+
state_outs = sa_episode.get_extra_model_outputs(
|
| 262 |
+
key=Columns.STATE_OUT
|
| 263 |
+
)
|
| 264 |
+
# Otherwise, we have no `"state_out"` (e.g. because we are sampling
|
| 265 |
+
# from offline data and the expert policy was not stateful).
|
| 266 |
+
else:
|
| 267 |
+
# Then simply use the `look_back_state`, i.e. in this case the
|
| 268 |
+
# initial state as `"state_in` in training.
|
| 269 |
+
if sa_episode.is_numpy:
|
| 270 |
+
state_outs = tree.map_structure(
|
| 271 |
+
lambda a, _sae=sa_episode: np.repeat(
|
| 272 |
+
a[np.newaxis, ...], len(_sae), axis=0
|
| 273 |
+
),
|
| 274 |
+
look_back_state,
|
| 275 |
+
)
|
| 276 |
+
else:
|
| 277 |
+
state_outs = [look_back_state for _ in range(len(sa_episode))]
|
| 278 |
+
# Explanation:
|
| 279 |
+
# B=episode len // max_seq_len
|
| 280 |
+
# [::max_seq_len]: only keep every Tth state.
|
| 281 |
+
# [:-1]: Shift state outs by one; ignore very last
|
| 282 |
+
# STATE_OUT, but therefore add the lookback/init state at
|
| 283 |
+
# the beginning.
|
| 284 |
+
items_to_add = (
|
| 285 |
+
tree.map_structure(
|
| 286 |
+
lambda i, o, m=max_seq_len: np.concatenate([[i], o[:-1]])[::m],
|
| 287 |
+
look_back_state,
|
| 288 |
+
state_outs,
|
| 289 |
+
)
|
| 290 |
+
if sa_episode.is_numpy
|
| 291 |
+
else ([look_back_state] + state_outs[:-1])[::max_seq_len]
|
| 292 |
+
)
|
| 293 |
+
self.add_n_batch_items(
|
| 294 |
+
batch=batch,
|
| 295 |
+
column=Columns.STATE_IN,
|
| 296 |
+
items_to_add=items_to_add,
|
| 297 |
+
num_items=int(math.ceil(len(sa_episode) / max_seq_len)),
|
| 298 |
+
single_agent_episode=sa_episode,
|
| 299 |
+
)
|
| 300 |
+
if Columns.NEXT_OBS in batch:
|
| 301 |
+
items_to_add = (
|
| 302 |
+
tree.map_structure(
|
| 303 |
+
lambda i, m=max_seq_len: i[::m],
|
| 304 |
+
state_outs,
|
| 305 |
+
)
|
| 306 |
+
if sa_episode.is_numpy
|
| 307 |
+
else state_outs[::max_seq_len]
|
| 308 |
+
)
|
| 309 |
+
self.add_n_batch_items(
|
| 310 |
+
batch=batch,
|
| 311 |
+
column=Columns.NEXT_STATE_IN,
|
| 312 |
+
items_to_add=items_to_add,
|
| 313 |
+
num_items=int(math.ceil(len(sa_episode) / max_seq_len)),
|
| 314 |
+
single_agent_episode=sa_episode,
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
else:
|
| 318 |
+
assert not sa_episode.is_numpy
|
| 319 |
+
|
| 320 |
+
# Multi-agent case: Extract correct single agent RLModule (to get the
|
| 321 |
+
# state for individually).
|
| 322 |
+
sa_module = rl_module
|
| 323 |
+
if sa_episode.module_id is not None:
|
| 324 |
+
sa_module = rl_module[sa_episode.module_id]
|
| 325 |
+
# This single-agent RLModule is NOT stateful -> Skip.
|
| 326 |
+
if not sa_module.is_stateful():
|
| 327 |
+
continue
|
| 328 |
+
|
| 329 |
+
# Episode just started or has no `"state_out"` (e.g. in offline
|
| 330 |
+
# sampling) -> Get initial state from our RLModule.
|
| 331 |
+
if (sa_episode.t_started == 0 and len(sa_episode) == 0) or (
|
| 332 |
+
Columns.STATE_OUT not in sa_episode.extra_model_outputs
|
| 333 |
+
):
|
| 334 |
+
state = sa_module.get_initial_state()
|
| 335 |
+
# Episode is already ongoing -> Use most recent STATE_OUT.
|
| 336 |
+
else:
|
| 337 |
+
state = sa_episode.get_extra_model_outputs(
|
| 338 |
+
key=Columns.STATE_OUT,
|
| 339 |
+
indices=-1,
|
| 340 |
+
)
|
| 341 |
+
self.add_batch_item(
|
| 342 |
+
batch,
|
| 343 |
+
Columns.STATE_IN,
|
| 344 |
+
item_to_add=state,
|
| 345 |
+
single_agent_episode=sa_episode,
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
return batch
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_time_dim_to_batch_and_zero_pad.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
import gymnasium as gym
|
| 4 |
+
import numpy as np
|
| 5 |
+
import tree # pip install dm_tree
|
| 6 |
+
|
| 7 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 8 |
+
from ray.rllib.core import DEFAULT_MODULE_ID
|
| 9 |
+
from ray.rllib.core.columns import Columns
|
| 10 |
+
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
|
| 11 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 12 |
+
from ray.rllib.utils.annotations import override
|
| 13 |
+
from ray.rllib.utils.postprocessing.zero_padding import (
|
| 14 |
+
create_mask_and_seq_lens,
|
| 15 |
+
split_and_zero_pad,
|
| 16 |
+
)
|
| 17 |
+
from ray.rllib.utils.spaces.space_utils import BatchedNdArray
|
| 18 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 19 |
+
from ray.util.annotations import PublicAPI
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@PublicAPI(stability="alpha")
|
| 23 |
+
class AddTimeDimToBatchAndZeroPad(ConnectorV2):
|
| 24 |
+
"""Adds an extra time dim (axis=1) to all data currently in the batch.
|
| 25 |
+
|
| 26 |
+
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
|
| 27 |
+
are added automatically by RLlib into every env-to-module/Learner connector
|
| 28 |
+
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
|
| 29 |
+
`config.add_default_connectors_to_learner_pipeline ` are set to
|
| 30 |
+
False.
|
| 31 |
+
|
| 32 |
+
The default env-to-module connector pipeline is:
|
| 33 |
+
[
|
| 34 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 35 |
+
AddObservationsFromEpisodesToBatch,
|
| 36 |
+
AddTimeDimToBatchAndZeroPad,
|
| 37 |
+
AddStatesFromEpisodesToBatch,
|
| 38 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 39 |
+
BatchIndividualItems,
|
| 40 |
+
NumpyToTensor,
|
| 41 |
+
]
|
| 42 |
+
The default Learner connector pipeline is:
|
| 43 |
+
[
|
| 44 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 45 |
+
AddObservationsFromEpisodesToBatch,
|
| 46 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 47 |
+
AddTimeDimToBatchAndZeroPad,
|
| 48 |
+
AddStatesFromEpisodesToBatch,
|
| 49 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 50 |
+
BatchIndividualItems,
|
| 51 |
+
NumpyToTensor,
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
If the RLModule is stateful, an extra time dim at axis=1 is added to all data in the
|
| 55 |
+
batch.
|
| 56 |
+
|
| 57 |
+
Also, all data (observations, rewards, etc.. if applicable) will be properly
|
| 58 |
+
reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
|
| 59 |
+
zero-padded, if necessary.
|
| 60 |
+
|
| 61 |
+
This ConnectorV2:
|
| 62 |
+
- Operates on a list of Episode objects.
|
| 63 |
+
- Adds a time dim at axis=1 to all columns already in the batch.
|
| 64 |
+
- In case of a learner connector pipeline, zero-pads the data according to the
|
| 65 |
+
module's `self.model_config["max_seq_len"]` setting and reshapes all data to
|
| 66 |
+
(B, T, ...). The connector also adds SEQ_LENS information and loss mask
|
| 67 |
+
information to the batch based on the added zero-padding.
|
| 68 |
+
- Does NOT alter any data in the given episodes.
|
| 69 |
+
- Can be used in EnvToModule and Learner connector pipelines.
|
| 70 |
+
|
| 71 |
+
.. testcode::
|
| 72 |
+
|
| 73 |
+
from ray.rllib.connectors.common import AddTimeDimToBatchAndZeroPad
|
| 74 |
+
from ray.rllib.core.columns import Columns
|
| 75 |
+
from ray.rllib.env.single_agent_episode import SingleAgentEpisode
|
| 76 |
+
from ray.rllib.utils.test_utils import check
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# Create a simple dummy class, pretending to be an RLModule with
|
| 80 |
+
# `get_initial_state`, `is_stateful` and `model_config` property defined:
|
| 81 |
+
class MyStateModule:
|
| 82 |
+
# dummy config
|
| 83 |
+
model_config = {"max_seq_len": 3}
|
| 84 |
+
|
| 85 |
+
def is_stateful(self):
|
| 86 |
+
return True
|
| 87 |
+
|
| 88 |
+
def get_initial_state(self):
|
| 89 |
+
return 0.0
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# Create an already reset episode. Expect the connector to add a time-dim to the
|
| 93 |
+
# reset observation.
|
| 94 |
+
episode = SingleAgentEpisode(observations=[0])
|
| 95 |
+
|
| 96 |
+
rl_module = MyStateModule()
|
| 97 |
+
|
| 98 |
+
# Create an instance of this class (as an env-to-module connector).
|
| 99 |
+
connector = AddTimeDimToBatchAndZeroPad(as_learner_connector=False)
|
| 100 |
+
|
| 101 |
+
# Call the connector.
|
| 102 |
+
output_batch = connector(
|
| 103 |
+
rl_module=rl_module,
|
| 104 |
+
batch={Columns.OBS: [0]},
|
| 105 |
+
episodes=[episode],
|
| 106 |
+
shared_data={},
|
| 107 |
+
)
|
| 108 |
+
# The output data's OBS key should now be reshaped to (B, T)
|
| 109 |
+
check(output_batch[Columns.OBS], [[0]])
|
| 110 |
+
|
| 111 |
+
# Create a SingleAgentEpisodes containing 5 observations,
|
| 112 |
+
# 4 actions and 4 rewards.
|
| 113 |
+
episode = SingleAgentEpisode(
|
| 114 |
+
observations=[0, 1, 2, 3, 4],
|
| 115 |
+
actions=[1, 2, 3, 4],
|
| 116 |
+
rewards=[1.0, 2.0, 3.0, 4.0],
|
| 117 |
+
len_lookback_buffer=0,
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Call the connector.
|
| 121 |
+
output_batch = connector(
|
| 122 |
+
rl_module=rl_module,
|
| 123 |
+
batch={Columns.OBS: [4]},
|
| 124 |
+
episodes=[episode],
|
| 125 |
+
shared_data={},
|
| 126 |
+
)
|
| 127 |
+
# The output data's OBS, ACTIONS, and REWARDS keys should now all have a time
|
| 128 |
+
# rank.
|
| 129 |
+
check(
|
| 130 |
+
# Expect the episode's last OBS.
|
| 131 |
+
output_batch[Columns.OBS], [[4]],
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Create a new connector as a learner connector with a RNN seq len of 4 (for
|
| 135 |
+
# testing purposes only). Passing the same data through this learner connector,
|
| 136 |
+
# we expect the data to also be zero-padded.
|
| 137 |
+
connector = AddTimeDimToBatchAndZeroPad(as_learner_connector=True)
|
| 138 |
+
|
| 139 |
+
# Call the connector.
|
| 140 |
+
output_batch = connector(
|
| 141 |
+
rl_module=rl_module,
|
| 142 |
+
batch={Columns.OBS: {(episode.id_,): [0, 1, 2, 3]}},
|
| 143 |
+
episodes=[episode],
|
| 144 |
+
shared_data={},
|
| 145 |
+
)
|
| 146 |
+
check(output_batch[Columns.OBS], {(episode.id_,): [[0, 1, 2], [3, 0, 0]]})
|
| 147 |
+
"""
|
| 148 |
+
|
| 149 |
+
def __init__(
|
| 150 |
+
self,
|
| 151 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 152 |
+
input_action_space: Optional[gym.Space] = None,
|
| 153 |
+
*,
|
| 154 |
+
as_learner_connector: bool = False,
|
| 155 |
+
**kwargs,
|
| 156 |
+
):
|
| 157 |
+
"""Initializes a AddObservationsFromEpisodesToBatch instance.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
as_learner_connector: Whether this connector is part of a Learner connector
|
| 161 |
+
pipeline, as opposed to a env-to-module pipeline. As a Learner
|
| 162 |
+
connector, it will add an entire Episode's observations (each timestep)
|
| 163 |
+
to the batch.
|
| 164 |
+
"""
|
| 165 |
+
super().__init__(
|
| 166 |
+
input_observation_space=input_observation_space,
|
| 167 |
+
input_action_space=input_action_space,
|
| 168 |
+
**kwargs,
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
self._as_learner_connector = as_learner_connector
|
| 172 |
+
|
| 173 |
+
@override(ConnectorV2)
|
| 174 |
+
def __call__(
|
| 175 |
+
self,
|
| 176 |
+
*,
|
| 177 |
+
rl_module: RLModule,
|
| 178 |
+
batch: Dict[str, Any],
|
| 179 |
+
episodes: List[EpisodeType],
|
| 180 |
+
explore: Optional[bool] = None,
|
| 181 |
+
shared_data: Optional[dict] = None,
|
| 182 |
+
**kwargs,
|
| 183 |
+
) -> Any:
|
| 184 |
+
|
| 185 |
+
# If not stateful OR STATE_IN already in data, early out.
|
| 186 |
+
if not rl_module.is_stateful() or Columns.STATE_IN in batch:
|
| 187 |
+
return batch
|
| 188 |
+
|
| 189 |
+
# Make all inputs (other than STATE_IN) have an additional T-axis.
|
| 190 |
+
# Since data has not been batched yet (we are still operating on lists in the
|
| 191 |
+
# batch), we add this time axis as 0 (not 1). When we batch, the batch axis will
|
| 192 |
+
# be 0 and the time axis will be 1.
|
| 193 |
+
# Also, let module-to-env pipeline know that we had added a single timestep
|
| 194 |
+
# time rank to the data (to remove it again).
|
| 195 |
+
if not self._as_learner_connector:
|
| 196 |
+
for column in batch.keys():
|
| 197 |
+
self.foreach_batch_item_change_in_place(
|
| 198 |
+
batch=batch,
|
| 199 |
+
column=column,
|
| 200 |
+
func=lambda item, eps_id, aid, mid: (
|
| 201 |
+
item
|
| 202 |
+
if mid is not None and not rl_module[mid].is_stateful()
|
| 203 |
+
# Expand on axis 0 (the to-be-time-dim) if item has not been
|
| 204 |
+
# batched yet, otherwise axis=1 (the time-dim).
|
| 205 |
+
else tree.map_structure(
|
| 206 |
+
lambda s: np.expand_dims(
|
| 207 |
+
s, axis=(1 if isinstance(s, BatchedNdArray) else 0)
|
| 208 |
+
),
|
| 209 |
+
item,
|
| 210 |
+
)
|
| 211 |
+
),
|
| 212 |
+
)
|
| 213 |
+
shared_data["_added_single_ts_time_rank"] = True
|
| 214 |
+
else:
|
| 215 |
+
# Before adding STATE_IN to the `data`, zero-pad existing data and batch
|
| 216 |
+
# into max_seq_len chunks.
|
| 217 |
+
for column, column_data in batch.copy().items():
|
| 218 |
+
# Do not zero-pad INFOS column.
|
| 219 |
+
if column == Columns.INFOS:
|
| 220 |
+
continue
|
| 221 |
+
for key, item_list in column_data.items():
|
| 222 |
+
# Multi-agent case AND RLModule is not stateful -> Do not zero-pad
|
| 223 |
+
# for this model.
|
| 224 |
+
assert isinstance(key, tuple)
|
| 225 |
+
mid = None
|
| 226 |
+
if len(key) == 3:
|
| 227 |
+
eps_id, aid, mid = key
|
| 228 |
+
if not rl_module[mid].is_stateful():
|
| 229 |
+
continue
|
| 230 |
+
column_data[key] = split_and_zero_pad(
|
| 231 |
+
item_list,
|
| 232 |
+
max_seq_len=self._get_max_seq_len(rl_module, module_id=mid),
|
| 233 |
+
)
|
| 234 |
+
# TODO (sven): Remove this hint/hack once we are not relying on
|
| 235 |
+
# SampleBatch anymore (which has to set its property
|
| 236 |
+
# zero_padded=True when shuffling).
|
| 237 |
+
shared_data[
|
| 238 |
+
(
|
| 239 |
+
"_zero_padded_for_mid="
|
| 240 |
+
f"{mid if mid is not None else DEFAULT_MODULE_ID}"
|
| 241 |
+
)
|
| 242 |
+
] = True
|
| 243 |
+
|
| 244 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 245 |
+
# If Learner connector, get all episodes (for train batch).
|
| 246 |
+
# If EnvToModule, get only those ongoing episodes that just had their
|
| 247 |
+
# agent step (b/c those are the ones we need to compute actions for next).
|
| 248 |
+
episodes,
|
| 249 |
+
agents_that_stepped_only=False,
|
| 250 |
+
):
|
| 251 |
+
# Multi-agent case: Extract correct single agent RLModule (to get its
|
| 252 |
+
# individual state).
|
| 253 |
+
if sa_episode.module_id is not None:
|
| 254 |
+
sa_module = rl_module[sa_episode.module_id]
|
| 255 |
+
else:
|
| 256 |
+
sa_module = (
|
| 257 |
+
rl_module[DEFAULT_MODULE_ID]
|
| 258 |
+
if isinstance(rl_module, MultiRLModule)
|
| 259 |
+
else rl_module
|
| 260 |
+
)
|
| 261 |
+
# This single-agent RLModule is NOT stateful -> Skip.
|
| 262 |
+
if not sa_module.is_stateful():
|
| 263 |
+
continue
|
| 264 |
+
|
| 265 |
+
max_seq_len = sa_module.model_config["max_seq_len"]
|
| 266 |
+
|
| 267 |
+
# Also, create the loss mask (b/c of our now possibly zero-padded data)
|
| 268 |
+
# as well as the seq_lens array and add these to `data` as well.
|
| 269 |
+
mask, seq_lens = create_mask_and_seq_lens(len(sa_episode), max_seq_len)
|
| 270 |
+
self.add_n_batch_items(
|
| 271 |
+
batch=batch,
|
| 272 |
+
column=Columns.SEQ_LENS,
|
| 273 |
+
items_to_add=seq_lens,
|
| 274 |
+
num_items=len(seq_lens),
|
| 275 |
+
single_agent_episode=sa_episode,
|
| 276 |
+
)
|
| 277 |
+
if not shared_data.get("_added_loss_mask_for_valid_episode_ts"):
|
| 278 |
+
self.add_n_batch_items(
|
| 279 |
+
batch=batch,
|
| 280 |
+
column=Columns.LOSS_MASK,
|
| 281 |
+
items_to_add=mask,
|
| 282 |
+
num_items=len(mask),
|
| 283 |
+
single_agent_episode=sa_episode,
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
return batch
|
| 287 |
+
|
| 288 |
+
def _get_max_seq_len(self, rl_module, module_id=None):
|
| 289 |
+
if not isinstance(rl_module, MultiRLModule):
|
| 290 |
+
mod = rl_module
|
| 291 |
+
elif module_id:
|
| 292 |
+
mod = rl_module[module_id]
|
| 293 |
+
else:
|
| 294 |
+
mod = next(iter(rl_module.values()))
|
| 295 |
+
if "max_seq_len" not in mod.model_config:
|
| 296 |
+
raise ValueError(
|
| 297 |
+
"You are using a stateful RLModule and are not providing a "
|
| 298 |
+
"'max_seq_len' key inside your `model_config`. You can set this "
|
| 299 |
+
"dict and/or override keys in it via `config.rl_module("
|
| 300 |
+
"model_config={'max_seq_len': [some int]})`."
|
| 301 |
+
)
|
| 302 |
+
return mod.model_config["max_seq_len"]
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/agent_to_module_mapping.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import defaultdict
|
| 2 |
+
from typing import Any, Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
import gymnasium as gym
|
| 5 |
+
|
| 6 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 7 |
+
from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
|
| 8 |
+
from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
|
| 9 |
+
from ray.rllib.utils.annotations import override
|
| 10 |
+
from ray.rllib.utils.typing import EpisodeType, ModuleID
|
| 11 |
+
from ray.util.annotations import PublicAPI
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@PublicAPI(stability="alpha")
|
| 15 |
+
class AgentToModuleMapping(ConnectorV2):
|
| 16 |
+
"""ConnectorV2 that performs mapping of data from AgentID based to ModuleID based.
|
| 17 |
+
|
| 18 |
+
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
|
| 19 |
+
are added automatically by RLlib into every env-to-module/Learner connector
|
| 20 |
+
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
|
| 21 |
+
`config.add_default_connectors_to_learner_pipeline ` are set to
|
| 22 |
+
False.
|
| 23 |
+
|
| 24 |
+
The default env-to-module connector pipeline is:
|
| 25 |
+
[
|
| 26 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 27 |
+
AddObservationsFromEpisodesToBatch,
|
| 28 |
+
AddTimeDimToBatchAndZeroPad,
|
| 29 |
+
AddStatesFromEpisodesToBatch,
|
| 30 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 31 |
+
BatchIndividualItems,
|
| 32 |
+
NumpyToTensor,
|
| 33 |
+
]
|
| 34 |
+
The default Learner connector pipeline is:
|
| 35 |
+
[
|
| 36 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 37 |
+
AddObservationsFromEpisodesToBatch,
|
| 38 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 39 |
+
AddTimeDimToBatchAndZeroPad,
|
| 40 |
+
AddStatesFromEpisodesToBatch,
|
| 41 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 42 |
+
BatchIndividualItems,
|
| 43 |
+
NumpyToTensor,
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
This connector piece is only used by RLlib (as a default connector piece) in a
|
| 47 |
+
multi-agent setup.
|
| 48 |
+
|
| 49 |
+
Note that before the mapping, `data` is expected to have the following
|
| 50 |
+
structure:
|
| 51 |
+
[col0]:
|
| 52 |
+
(eps_id0, ag0, mod0): [list of individual batch items]
|
| 53 |
+
(eps_id0, ag1, mod2): [list of individual batch items]
|
| 54 |
+
(eps_id1, ag0, mod1): [list of individual batch items]
|
| 55 |
+
[col1]:
|
| 56 |
+
etc..
|
| 57 |
+
|
| 58 |
+
The target structure of the above `data` would then be:
|
| 59 |
+
[mod0]:
|
| 60 |
+
[col0]: [batched data -> batch_size_B will be the number of all items in the
|
| 61 |
+
input data under col0 that have mod0 as their ModuleID]
|
| 62 |
+
[col1]: [batched data]
|
| 63 |
+
[mod1]:
|
| 64 |
+
[col0]: etc.
|
| 65 |
+
|
| 66 |
+
Mapping happens in the following stages:
|
| 67 |
+
|
| 68 |
+
1) Under each column name, sort keys first by EpisodeID, then AgentID.
|
| 69 |
+
2) Add ModuleID keys under each column name (no cost/extra memory) and map these
|
| 70 |
+
new keys to empty lists.
|
| 71 |
+
[col0] -> [mod0] -> []: Then push items that belong to mod0 into these lists.
|
| 72 |
+
3) Perform batching on the per-module lists under each column:
|
| 73 |
+
[col0] -> [mod0]: [...] <- now batched data (numpy array or struct of numpy
|
| 74 |
+
arrays).
|
| 75 |
+
4) Flip column names with ModuleIDs (no cost/extra memory):
|
| 76 |
+
[mod0]:
|
| 77 |
+
[col0]: [batched data]
|
| 78 |
+
etc..
|
| 79 |
+
|
| 80 |
+
Note that in order to unmap the resulting batch back into an AgentID based one,
|
| 81 |
+
we have to store the env vector index AND AgentID of each module's batch item
|
| 82 |
+
in an additionally returned `memorized_map_structure`.
|
| 83 |
+
|
| 84 |
+
.. testcode::
|
| 85 |
+
|
| 86 |
+
from ray.rllib.connectors.env_to_module import AgentToModuleMapping
|
| 87 |
+
from ray.rllib.utils.test_utils import check
|
| 88 |
+
|
| 89 |
+
batch = {
|
| 90 |
+
"obs": {
|
| 91 |
+
("MA-EPS0", "agent0", "module0"): [0, 1, 2],
|
| 92 |
+
("MA-EPS0", "agent1", "module1"): [3, 4, 5],
|
| 93 |
+
},
|
| 94 |
+
"actions": {
|
| 95 |
+
("MA-EPS1", "agent2", "module0"): [8],
|
| 96 |
+
("MA-EPS0", "agent1", "module1"): [9],
|
| 97 |
+
},
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
# Create our connector piece.
|
| 101 |
+
connector = AgentToModuleMapping(
|
| 102 |
+
rl_module_specs={"module0", "module1"},
|
| 103 |
+
agent_to_module_mapping_fn=(
|
| 104 |
+
lambda agent_id, eps: "module1" if agent_id == "agent1" else "module0"
|
| 105 |
+
),
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
# Call the connector (and thereby flip from AgentID based to ModuleID based
|
| 109 |
+
# structure..
|
| 110 |
+
output_batch = connector(
|
| 111 |
+
rl_module=None, # This particular connector works without an RLModule.
|
| 112 |
+
batch=batch,
|
| 113 |
+
episodes=[], # This particular connector works without a list of episodes.
|
| 114 |
+
explore=True,
|
| 115 |
+
shared_data={},
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# `data` should now be mapped from ModuleIDs to module data.
|
| 119 |
+
check(
|
| 120 |
+
output_batch,
|
| 121 |
+
{
|
| 122 |
+
"module0": {
|
| 123 |
+
"obs": [0, 1, 2],
|
| 124 |
+
"actions": [8],
|
| 125 |
+
},
|
| 126 |
+
"module1": {
|
| 127 |
+
"obs": [3, 4, 5],
|
| 128 |
+
"actions": [9],
|
| 129 |
+
},
|
| 130 |
+
},
|
| 131 |
+
)
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
@override(ConnectorV2)
|
| 135 |
+
def recompute_output_observation_space(
|
| 136 |
+
self,
|
| 137 |
+
input_observation_space: gym.Space,
|
| 138 |
+
input_action_space: gym.Space,
|
| 139 |
+
) -> gym.Space:
|
| 140 |
+
return self._map_space_if_necessary(input_observation_space, "obs")
|
| 141 |
+
|
| 142 |
+
@override(ConnectorV2)
|
| 143 |
+
def recompute_output_action_space(
|
| 144 |
+
self,
|
| 145 |
+
input_observation_space: gym.Space,
|
| 146 |
+
input_action_space: gym.Space,
|
| 147 |
+
) -> gym.Space:
|
| 148 |
+
return self._map_space_if_necessary(input_action_space, "act")
|
| 149 |
+
|
| 150 |
+
def __init__(
|
| 151 |
+
self,
|
| 152 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 153 |
+
input_action_space: Optional[gym.Space] = None,
|
| 154 |
+
*,
|
| 155 |
+
rl_module_specs: Dict[ModuleID, RLModuleSpec],
|
| 156 |
+
agent_to_module_mapping_fn,
|
| 157 |
+
):
|
| 158 |
+
super().__init__(input_observation_space, input_action_space)
|
| 159 |
+
|
| 160 |
+
self._rl_module_specs = rl_module_specs
|
| 161 |
+
self._agent_to_module_mapping_fn = agent_to_module_mapping_fn
|
| 162 |
+
|
| 163 |
+
@override(ConnectorV2)
|
| 164 |
+
def __call__(
|
| 165 |
+
self,
|
| 166 |
+
*,
|
| 167 |
+
rl_module: RLModule,
|
| 168 |
+
batch: Dict[str, Any],
|
| 169 |
+
episodes: List[EpisodeType],
|
| 170 |
+
explore: Optional[bool] = None,
|
| 171 |
+
shared_data: Optional[dict] = None,
|
| 172 |
+
**kwargs,
|
| 173 |
+
) -> Any:
|
| 174 |
+
# Current agent to module mapping function.
|
| 175 |
+
# agent_to_module_mapping_fn = shared_data.get("agent_to_module_mapping_fn")
|
| 176 |
+
# Store in shared data, which module IDs map to which episode/agent, such
|
| 177 |
+
# that the module-to-env pipeline can map the data back to agents.
|
| 178 |
+
memorized_map_structure = defaultdict(list)
|
| 179 |
+
for column, agent_data in batch.items():
|
| 180 |
+
if rl_module is not None and column in rl_module:
|
| 181 |
+
continue
|
| 182 |
+
for eps_id, agent_id, module_id in agent_data.keys():
|
| 183 |
+
memorized_map_structure[module_id].append((eps_id, agent_id))
|
| 184 |
+
# TODO (sven): We should check that all columns have the same struct.
|
| 185 |
+
break
|
| 186 |
+
|
| 187 |
+
shared_data["memorized_map_structure"] = dict(memorized_map_structure)
|
| 188 |
+
|
| 189 |
+
# Mapping from ModuleID to column data.
|
| 190 |
+
data_by_module = {}
|
| 191 |
+
|
| 192 |
+
# Iterating over each column in the original data:
|
| 193 |
+
for column, agent_data in batch.items():
|
| 194 |
+
if rl_module is not None and column in rl_module:
|
| 195 |
+
if column in data_by_module:
|
| 196 |
+
data_by_module[column].update(agent_data)
|
| 197 |
+
else:
|
| 198 |
+
data_by_module[column] = agent_data
|
| 199 |
+
continue
|
| 200 |
+
for (
|
| 201 |
+
eps_id,
|
| 202 |
+
agent_id,
|
| 203 |
+
module_id,
|
| 204 |
+
), values_batch_or_list in agent_data.items():
|
| 205 |
+
assert isinstance(values_batch_or_list, list)
|
| 206 |
+
for value in values_batch_or_list:
|
| 207 |
+
if module_id not in data_by_module:
|
| 208 |
+
data_by_module[module_id] = {column: []}
|
| 209 |
+
elif column not in data_by_module[module_id]:
|
| 210 |
+
data_by_module[module_id][column] = []
|
| 211 |
+
|
| 212 |
+
# Append the data.
|
| 213 |
+
data_by_module[module_id][column].append(value)
|
| 214 |
+
|
| 215 |
+
return data_by_module
|
| 216 |
+
|
| 217 |
+
def _map_space_if_necessary(self, space: gym.Space, which: str = "obs"):
|
| 218 |
+
# Analyze input observation space to check, whether the user has already taken
|
| 219 |
+
# care of the agent to module mapping.
|
| 220 |
+
if set(self._rl_module_specs) == set(space.spaces.keys()):
|
| 221 |
+
return space
|
| 222 |
+
|
| 223 |
+
# We need to take care of agent to module mapping. Figure out the resulting
|
| 224 |
+
# observation space here.
|
| 225 |
+
dummy_eps = MultiAgentEpisode()
|
| 226 |
+
|
| 227 |
+
ret_space = {}
|
| 228 |
+
for module_id in self._rl_module_specs:
|
| 229 |
+
# Easy way out, user has provided space in the RLModule spec dict.
|
| 230 |
+
if (
|
| 231 |
+
isinstance(self._rl_module_specs, dict)
|
| 232 |
+
and module_id in self._rl_module_specs
|
| 233 |
+
):
|
| 234 |
+
if (
|
| 235 |
+
which == "obs"
|
| 236 |
+
and self._rl_module_specs[module_id].observation_space
|
| 237 |
+
):
|
| 238 |
+
ret_space[module_id] = self._rl_module_specs[
|
| 239 |
+
module_id
|
| 240 |
+
].observation_space
|
| 241 |
+
continue
|
| 242 |
+
elif which == "act" and self._rl_module_specs[module_id].action_space:
|
| 243 |
+
ret_space[module_id] = self._rl_module_specs[module_id].action_space
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
# Need to reverse map spaces (for the different agents) to certain
|
| 247 |
+
# module IDs (using a dummy MultiAgentEpisode).
|
| 248 |
+
one_space = next(iter(space.spaces.values()))
|
| 249 |
+
# If all obs spaces are the same anyway, just use the first
|
| 250 |
+
# single-agent space.
|
| 251 |
+
if all(s == one_space for s in space.spaces.values()):
|
| 252 |
+
ret_space[module_id] = one_space
|
| 253 |
+
# Otherwise, we have to compare the ModuleID with all possible
|
| 254 |
+
# AgentIDs and find the agent ID that matches.
|
| 255 |
+
else:
|
| 256 |
+
match_aid = None
|
| 257 |
+
one_agent_for_module_found = False
|
| 258 |
+
for aid in space.spaces.keys():
|
| 259 |
+
# Match: Assign spaces for this agentID to the PolicyID.
|
| 260 |
+
if self._agent_to_module_mapping_fn(aid, dummy_eps) == module_id:
|
| 261 |
+
# Make sure, different agents that map to the same
|
| 262 |
+
# policy don't have different spaces.
|
| 263 |
+
if (
|
| 264 |
+
module_id in ret_space
|
| 265 |
+
and space[aid] != ret_space[module_id]
|
| 266 |
+
):
|
| 267 |
+
raise ValueError(
|
| 268 |
+
f"Two agents ({aid} and {match_aid}) in your "
|
| 269 |
+
"environment map to the same ModuleID (as per your "
|
| 270 |
+
"`agent_to_module_mapping_fn`), however, these agents "
|
| 271 |
+
"also have different observation spaces as per the env!"
|
| 272 |
+
)
|
| 273 |
+
ret_space[module_id] = space[aid]
|
| 274 |
+
match_aid = aid
|
| 275 |
+
one_agent_for_module_found = True
|
| 276 |
+
# Still no space found for this module ID -> Error out.
|
| 277 |
+
if not one_agent_for_module_found:
|
| 278 |
+
raise ValueError(
|
| 279 |
+
f"Could not find or derive any {which}-space for RLModule "
|
| 280 |
+
f"{module_id}! This can happen if your `config.rl_module(rl_"
|
| 281 |
+
f"module_spec=...)` does NOT contain space information for this"
|
| 282 |
+
" particular single-agent module AND your agent-to-module-"
|
| 283 |
+
"mapping function is stochastic (such that for some agent A, "
|
| 284 |
+
"more than one ModuleID might be returned somewhat randomly). "
|
| 285 |
+
f"Fix this error by providing {which}-space information using "
|
| 286 |
+
"`config.rl_module(rl_module_spec=MultiRLModuleSpec("
|
| 287 |
+
f"rl_module_specs={{'{module_id}': RLModuleSpec("
|
| 288 |
+
"observation_space=..., action_space=...)}}))"
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
return gym.spaces.Dict(ret_space)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/batch_individual_items.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
import gymnasium as gym
|
| 4 |
+
|
| 5 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 6 |
+
from ray.rllib.core import DEFAULT_MODULE_ID
|
| 7 |
+
from ray.rllib.core.columns import Columns
|
| 8 |
+
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
|
| 9 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 10 |
+
from ray.rllib.utils.annotations import override
|
| 11 |
+
from ray.rllib.utils.spaces.space_utils import batch as batch_fn
|
| 12 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 13 |
+
from ray.util.annotations import PublicAPI
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@PublicAPI(stability="alpha")
|
| 17 |
+
class BatchIndividualItems(ConnectorV2):
|
| 18 |
+
"""Batches individual data-items (in lists) into tensors (with batch dimension).
|
| 19 |
+
|
| 20 |
+
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
|
| 21 |
+
are added automatically by RLlib into every env-to-module/Learner connector
|
| 22 |
+
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
|
| 23 |
+
`config.add_default_connectors_to_learner_pipeline ` are set to
|
| 24 |
+
False.
|
| 25 |
+
|
| 26 |
+
The default env-to-module connector pipeline is:
|
| 27 |
+
[
|
| 28 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 29 |
+
AddObservationsFromEpisodesToBatch,
|
| 30 |
+
AddTimeDimToBatchAndZeroPad,
|
| 31 |
+
AddStatesFromEpisodesToBatch,
|
| 32 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 33 |
+
BatchIndividualItems,
|
| 34 |
+
NumpyToTensor,
|
| 35 |
+
]
|
| 36 |
+
The default Learner connector pipeline is:
|
| 37 |
+
[
|
| 38 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 39 |
+
AddObservationsFromEpisodesToBatch,
|
| 40 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 41 |
+
AddTimeDimToBatchAndZeroPad,
|
| 42 |
+
AddStatesFromEpisodesToBatch,
|
| 43 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 44 |
+
BatchIndividualItems,
|
| 45 |
+
NumpyToTensor,
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
This ConnectorV2:
|
| 49 |
+
- Operates only on the input `data`, NOT the incoming list of episode objects
|
| 50 |
+
(ignored).
|
| 51 |
+
- In the single-agent case, `data` must already be a dict, structured as follows by
|
| 52 |
+
prior connector pieces of the same pipeline:
|
| 53 |
+
[col0] -> {[(eps_id,)]: [list of individual batch items]}
|
| 54 |
+
- In the multi-agent case, `data` must already be a dict, structured as follows by
|
| 55 |
+
prior connector pieces of the same pipeline (in particular the
|
| 56 |
+
`AgentToModuleMapping` piece):
|
| 57 |
+
[module_id] -> [col0] -> [list of individual batch items]
|
| 58 |
+
- Translates the above data under the different columns (e.g. "obs") into final
|
| 59 |
+
(batched) structures. For the single-agent case, the output `data` looks like this:
|
| 60 |
+
[col0] -> [possibly complex struct of batches (at the leafs)].
|
| 61 |
+
For the multi-agent case, the output `data` looks like this:
|
| 62 |
+
[module_id] -> [col0] -> [possibly complex struct of batches (at the leafs)].
|
| 63 |
+
|
| 64 |
+
.. testcode::
|
| 65 |
+
|
| 66 |
+
from ray.rllib.connectors.common import BatchIndividualItems
|
| 67 |
+
from ray.rllib.utils.test_utils import check
|
| 68 |
+
|
| 69 |
+
single_agent_batch = {
|
| 70 |
+
"obs": {
|
| 71 |
+
# Note that at this stage, next-obs is not part of the data anymore ..
|
| 72 |
+
("MA-EPS0",): [0, 1],
|
| 73 |
+
("MA-EPS1",): [2, 3],
|
| 74 |
+
},
|
| 75 |
+
"actions": {
|
| 76 |
+
# .. so we have as many actions per episode as we have observations.
|
| 77 |
+
("MA-EPS0",): [4, 5],
|
| 78 |
+
("MA-EPS1",): [6, 7],
|
| 79 |
+
},
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
# Create our (single-agent) connector piece.
|
| 83 |
+
connector = BatchIndividualItems()
|
| 84 |
+
|
| 85 |
+
# Call the connector (and thereby batch the individual items).
|
| 86 |
+
output_batch = connector(
|
| 87 |
+
rl_module=None, # This particular connector works without an RLModule.
|
| 88 |
+
batch=single_agent_batch,
|
| 89 |
+
episodes=[], # This particular connector works without a list of episodes.
|
| 90 |
+
explore=True,
|
| 91 |
+
shared_data={},
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# `output_batch` should now be batched (episode IDs should have been removed
|
| 95 |
+
# from the struct).
|
| 96 |
+
check(
|
| 97 |
+
output_batch,
|
| 98 |
+
{"obs": [0, 1, 2, 3], "actions": [4, 5, 6, 7]},
|
| 99 |
+
)
|
| 100 |
+
"""
|
| 101 |
+
|
| 102 |
+
def __init__(
|
| 103 |
+
self,
|
| 104 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 105 |
+
input_action_space: Optional[gym.Space] = None,
|
| 106 |
+
*,
|
| 107 |
+
multi_agent: bool = False,
|
| 108 |
+
**kwargs,
|
| 109 |
+
):
|
| 110 |
+
"""Initializes a BatchIndividualItems instance.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
multi_agent: Whether this is a connector operating on a multi-agent
|
| 114 |
+
observation space mapping AgentIDs to individual agents' observations.
|
| 115 |
+
"""
|
| 116 |
+
super().__init__(
|
| 117 |
+
input_observation_space=input_observation_space,
|
| 118 |
+
input_action_space=input_action_space,
|
| 119 |
+
**kwargs,
|
| 120 |
+
)
|
| 121 |
+
self._multi_agent = multi_agent
|
| 122 |
+
|
| 123 |
+
@override(ConnectorV2)
|
| 124 |
+
def __call__(
|
| 125 |
+
self,
|
| 126 |
+
*,
|
| 127 |
+
rl_module: RLModule,
|
| 128 |
+
batch: Dict[str, Any],
|
| 129 |
+
episodes: List[EpisodeType],
|
| 130 |
+
explore: Optional[bool] = None,
|
| 131 |
+
shared_data: Optional[dict] = None,
|
| 132 |
+
**kwargs,
|
| 133 |
+
) -> Any:
|
| 134 |
+
is_multi_rl_module = isinstance(rl_module, MultiRLModule)
|
| 135 |
+
|
| 136 |
+
# Convert lists of individual items into properly batched data.
|
| 137 |
+
for column, column_data in batch.copy().items():
|
| 138 |
+
# Multi-agent case: This connector piece should only be used after(!)
|
| 139 |
+
# the AgentToModuleMapping connector has already been applied, leading
|
| 140 |
+
# to a batch structure of:
|
| 141 |
+
# [module_id] -> [col0] -> [list of individual batch items]
|
| 142 |
+
if is_multi_rl_module and column in rl_module:
|
| 143 |
+
# Case, in which a column has already been properly batched before this
|
| 144 |
+
# connector piece is called.
|
| 145 |
+
if not self._multi_agent:
|
| 146 |
+
continue
|
| 147 |
+
# If MA Off-Policy and independent sampling we need to overcome this
|
| 148 |
+
# check.
|
| 149 |
+
module_data = column_data
|
| 150 |
+
for col, col_data in module_data.copy().items():
|
| 151 |
+
if isinstance(col_data, list) and col != Columns.INFOS:
|
| 152 |
+
module_data[col] = batch_fn(
|
| 153 |
+
col_data,
|
| 154 |
+
individual_items_already_have_batch_dim="auto",
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Simple case: There is a list directly under `column`:
|
| 158 |
+
# Batch the list.
|
| 159 |
+
elif isinstance(column_data, list):
|
| 160 |
+
batch[column] = batch_fn(
|
| 161 |
+
column_data,
|
| 162 |
+
individual_items_already_have_batch_dim="auto",
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Single-agent case: There is a dict under `column` mapping
|
| 166 |
+
# `eps_id` to lists of items:
|
| 167 |
+
# Concat all these lists, then batch.
|
| 168 |
+
elif not self._multi_agent:
|
| 169 |
+
# TODO: only really need this in non-Learner connector pipeline
|
| 170 |
+
memorized_map_structure = []
|
| 171 |
+
list_to_be_batched = []
|
| 172 |
+
for (eps_id,) in column_data.keys():
|
| 173 |
+
for item in column_data[(eps_id,)]:
|
| 174 |
+
# Only record structure for OBS column.
|
| 175 |
+
if column == Columns.OBS:
|
| 176 |
+
memorized_map_structure.append(eps_id)
|
| 177 |
+
list_to_be_batched.append(item)
|
| 178 |
+
# INFOS should not be batched (remain a list).
|
| 179 |
+
batch[column] = (
|
| 180 |
+
list_to_be_batched
|
| 181 |
+
if column == Columns.INFOS
|
| 182 |
+
else batch_fn(
|
| 183 |
+
list_to_be_batched,
|
| 184 |
+
individual_items_already_have_batch_dim="auto",
|
| 185 |
+
)
|
| 186 |
+
)
|
| 187 |
+
if is_multi_rl_module:
|
| 188 |
+
if DEFAULT_MODULE_ID not in batch:
|
| 189 |
+
batch[DEFAULT_MODULE_ID] = {}
|
| 190 |
+
batch[DEFAULT_MODULE_ID][column] = batch.pop(column)
|
| 191 |
+
|
| 192 |
+
# Only record structure for OBS column.
|
| 193 |
+
if column == Columns.OBS:
|
| 194 |
+
shared_data["memorized_map_structure"] = memorized_map_structure
|
| 195 |
+
# Multi-agent case: But Module ID not found in our RLModule -> Ignore this
|
| 196 |
+
# `module_id` entirely.
|
| 197 |
+
# else:
|
| 198 |
+
# pass
|
| 199 |
+
|
| 200 |
+
return batch
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/frame_stacking.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import Any, Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
import gymnasium as gym
|
| 5 |
+
import tree # pip install dm_tree
|
| 6 |
+
|
| 7 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 8 |
+
from ray.rllib.core.columns import Columns
|
| 9 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 10 |
+
from ray.rllib.utils.annotations import override
|
| 11 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 12 |
+
from ray.util.annotations import PublicAPI
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@PublicAPI(stability="alpha")
|
| 16 |
+
class _FrameStacking(ConnectorV2):
|
| 17 |
+
"""A connector piece that stacks the previous n observations into one."""
|
| 18 |
+
|
| 19 |
+
@override(ConnectorV2)
|
| 20 |
+
def recompute_output_observation_space(
|
| 21 |
+
self,
|
| 22 |
+
input_observation_space: gym.Space,
|
| 23 |
+
input_action_space: gym.Space,
|
| 24 |
+
) -> gym.Space:
|
| 25 |
+
# Change our observation space according to the given stacking settings.
|
| 26 |
+
if self._multi_agent:
|
| 27 |
+
ret = {}
|
| 28 |
+
for agent_id, obs_space in input_observation_space.spaces.items():
|
| 29 |
+
ret[agent_id] = self._convert_individual_space(obs_space)
|
| 30 |
+
return gym.spaces.Dict(ret)
|
| 31 |
+
else:
|
| 32 |
+
return self._convert_individual_space(input_observation_space)
|
| 33 |
+
|
| 34 |
+
def __init__(
|
| 35 |
+
self,
|
| 36 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 37 |
+
input_action_space: Optional[gym.Space] = None,
|
| 38 |
+
*,
|
| 39 |
+
num_frames: int = 1,
|
| 40 |
+
multi_agent: bool = False,
|
| 41 |
+
as_learner_connector: bool = False,
|
| 42 |
+
**kwargs,
|
| 43 |
+
):
|
| 44 |
+
"""Initializes a _FrameStackingConnector instance.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
num_frames: The number of observation frames to stack up (into a single
|
| 48 |
+
observation) for the RLModule's forward pass.
|
| 49 |
+
multi_agent: Whether this is a connector operating on a multi-agent
|
| 50 |
+
observation space mapping AgentIDs to individual agents' observations.
|
| 51 |
+
as_learner_connector: Whether this connector is part of a Learner connector
|
| 52 |
+
pipeline, as opposed to an env-to-module pipeline.
|
| 53 |
+
"""
|
| 54 |
+
super().__init__(
|
| 55 |
+
input_observation_space=input_observation_space,
|
| 56 |
+
input_action_space=input_action_space,
|
| 57 |
+
**kwargs,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
self._multi_agent = multi_agent
|
| 61 |
+
self.num_frames = num_frames
|
| 62 |
+
self._as_learner_connector = as_learner_connector
|
| 63 |
+
|
| 64 |
+
@override(ConnectorV2)
|
| 65 |
+
def __call__(
|
| 66 |
+
self,
|
| 67 |
+
*,
|
| 68 |
+
rl_module: RLModule,
|
| 69 |
+
batch: Dict[str, Any],
|
| 70 |
+
episodes: List[EpisodeType],
|
| 71 |
+
explore: Optional[bool] = None,
|
| 72 |
+
shared_data: Optional[dict] = None,
|
| 73 |
+
**kwargs,
|
| 74 |
+
) -> Any:
|
| 75 |
+
# Learner connector pipeline. Episodes have been numpy'ized.
|
| 76 |
+
if self._as_learner_connector:
|
| 77 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 78 |
+
episodes, agents_that_stepped_only=False
|
| 79 |
+
):
|
| 80 |
+
|
| 81 |
+
def _map_fn(s, _sa_episode=sa_episode):
|
| 82 |
+
# Squeeze out last dim.
|
| 83 |
+
s = np.squeeze(s, axis=-1)
|
| 84 |
+
# Calculate new shape and strides
|
| 85 |
+
new_shape = (len(_sa_episode), self.num_frames) + s.shape[1:]
|
| 86 |
+
new_strides = (s.strides[0],) + s.strides
|
| 87 |
+
# Create a strided view of the array.
|
| 88 |
+
return np.transpose(
|
| 89 |
+
np.lib.stride_tricks.as_strided(
|
| 90 |
+
s, shape=new_shape, strides=new_strides
|
| 91 |
+
),
|
| 92 |
+
axes=[0, 2, 3, 1],
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# Get all observations from the episode in one np array (except for
|
| 96 |
+
# the very last one, which is the final observation not needed for
|
| 97 |
+
# learning).
|
| 98 |
+
self.add_n_batch_items(
|
| 99 |
+
batch=batch,
|
| 100 |
+
column=Columns.OBS,
|
| 101 |
+
items_to_add=tree.map_structure(
|
| 102 |
+
_map_fn,
|
| 103 |
+
sa_episode.get_observations(
|
| 104 |
+
indices=slice(-self.num_frames + 1, len(sa_episode)),
|
| 105 |
+
neg_index_as_lookback=True,
|
| 106 |
+
fill=0.0,
|
| 107 |
+
),
|
| 108 |
+
),
|
| 109 |
+
num_items=len(sa_episode),
|
| 110 |
+
single_agent_episode=sa_episode,
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# Env-to-module pipeline. Episodes still operate on lists.
|
| 114 |
+
else:
|
| 115 |
+
for sa_episode in self.single_agent_episode_iterator(episodes):
|
| 116 |
+
assert not sa_episode.is_numpy
|
| 117 |
+
# Get the list of observations to stack.
|
| 118 |
+
obs_stack = sa_episode.get_observations(
|
| 119 |
+
indices=slice(-self.num_frames, None),
|
| 120 |
+
fill=0.0,
|
| 121 |
+
)
|
| 122 |
+
# Observation components are (w, h, 1)
|
| 123 |
+
# -> concatenate along axis=-1 to (w, h, [num_frames]).
|
| 124 |
+
stacked_obs = tree.map_structure(
|
| 125 |
+
lambda *s: np.concatenate(s, axis=2),
|
| 126 |
+
*obs_stack,
|
| 127 |
+
)
|
| 128 |
+
self.add_batch_item(
|
| 129 |
+
batch=batch,
|
| 130 |
+
column=Columns.OBS,
|
| 131 |
+
item_to_add=stacked_obs,
|
| 132 |
+
single_agent_episode=sa_episode,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
return batch
|
| 136 |
+
|
| 137 |
+
def _convert_individual_space(self, obs_space):
|
| 138 |
+
# Some assumptions: Space is box AND last dim (the stacking one) is 1.
|
| 139 |
+
assert isinstance(obs_space, gym.spaces.Box), obs_space
|
| 140 |
+
assert obs_space.shape[-1] == 1, obs_space
|
| 141 |
+
|
| 142 |
+
return gym.spaces.Box(
|
| 143 |
+
low=np.repeat(obs_space.low, repeats=self.num_frames, axis=-1),
|
| 144 |
+
high=np.repeat(obs_space.high, repeats=self.num_frames, axis=-1),
|
| 145 |
+
shape=list(obs_space.shape)[:-1] + [self.num_frames],
|
| 146 |
+
dtype=obs_space.dtype,
|
| 147 |
+
)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/module_to_agent_unmapping.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import defaultdict
|
| 2 |
+
from typing import Any, Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 5 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 6 |
+
from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
|
| 7 |
+
from ray.rllib.utils.annotations import override
|
| 8 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 9 |
+
from ray.util.annotations import PublicAPI
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@PublicAPI(stability="alpha")
|
| 13 |
+
class ModuleToAgentUnmapping(ConnectorV2):
|
| 14 |
+
"""Performs flipping of `data` from ModuleID- to AgentID based mapping.
|
| 15 |
+
|
| 16 |
+
Before mapping:
|
| 17 |
+
data[module1] -> [col, e.g. ACTIONS]
|
| 18 |
+
-> [dict mapping episode-identifying tuples to lists of data]
|
| 19 |
+
data[module2] -> ...
|
| 20 |
+
|
| 21 |
+
After mapping:
|
| 22 |
+
data[ACTIONS]: [dict mapping episode-identifying tuples to lists of data]
|
| 23 |
+
|
| 24 |
+
Note that episode-identifying tuples have the form of: (episode_id,) in the
|
| 25 |
+
single-agent case and (ma_episode_id, agent_id, module_id) in the multi-agent
|
| 26 |
+
case.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
@override(ConnectorV2)
|
| 30 |
+
def __call__(
|
| 31 |
+
self,
|
| 32 |
+
*,
|
| 33 |
+
rl_module: RLModule,
|
| 34 |
+
batch: Dict[str, Any],
|
| 35 |
+
episodes: List[EpisodeType],
|
| 36 |
+
explore: Optional[bool] = None,
|
| 37 |
+
shared_data: Optional[dict] = None,
|
| 38 |
+
**kwargs,
|
| 39 |
+
) -> Any:
|
| 40 |
+
# This Connector should only be used in a multi-agent setting.
|
| 41 |
+
assert isinstance(episodes[0], MultiAgentEpisode)
|
| 42 |
+
|
| 43 |
+
agent_data = defaultdict(dict)
|
| 44 |
+
for module_id, module_data in batch.items():
|
| 45 |
+
for column, values_dict in module_data.items():
|
| 46 |
+
agent_data[column].update(values_dict)
|
| 47 |
+
|
| 48 |
+
return dict(agent_data)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/numpy_to_tensor.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
import gymnasium as gym
|
| 4 |
+
|
| 5 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 6 |
+
from ray.rllib.core import DEFAULT_MODULE_ID
|
| 7 |
+
from ray.rllib.core.columns import Columns
|
| 8 |
+
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
|
| 9 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 10 |
+
from ray.rllib.utils.annotations import override
|
| 11 |
+
from ray.rllib.utils.torch_utils import convert_to_torch_tensor
|
| 12 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 13 |
+
from ray.util.annotations import PublicAPI
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@PublicAPI(stability="alpha")
|
| 17 |
+
class NumpyToTensor(ConnectorV2):
|
| 18 |
+
"""Converts numpy arrays across the entire input data into (framework) tensors.
|
| 19 |
+
|
| 20 |
+
The framework information is received via the provided `rl_module` arg in the
|
| 21 |
+
`__call__()` method.
|
| 22 |
+
|
| 23 |
+
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
|
| 24 |
+
are added automatically by RLlib into every env-to-module/Learner connector
|
| 25 |
+
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
|
| 26 |
+
`config.add_default_connectors_to_learner_pipeline ` are set to
|
| 27 |
+
False.
|
| 28 |
+
|
| 29 |
+
The default env-to-module connector pipeline is:
|
| 30 |
+
[
|
| 31 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 32 |
+
AddObservationsFromEpisodesToBatch,
|
| 33 |
+
AddTimeDimToBatchAndZeroPad,
|
| 34 |
+
AddStatesFromEpisodesToBatch,
|
| 35 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 36 |
+
BatchIndividualItems,
|
| 37 |
+
NumpyToTensor,
|
| 38 |
+
]
|
| 39 |
+
The default Learner connector pipeline is:
|
| 40 |
+
[
|
| 41 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 42 |
+
AddObservationsFromEpisodesToBatch,
|
| 43 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 44 |
+
AddTimeDimToBatchAndZeroPad,
|
| 45 |
+
AddStatesFromEpisodesToBatch,
|
| 46 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 47 |
+
BatchIndividualItems,
|
| 48 |
+
NumpyToTensor,
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
This ConnectorV2:
|
| 52 |
+
- Loops through the input `data` and converts all found numpy arrays into
|
| 53 |
+
framework-specific tensors (possibly on a GPU).
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
def __init__(
|
| 57 |
+
self,
|
| 58 |
+
input_observation_space: Optional[gym.Space] = None,
|
| 59 |
+
input_action_space: Optional[gym.Space] = None,
|
| 60 |
+
*,
|
| 61 |
+
as_learner_connector: bool = False,
|
| 62 |
+
pin_mempory: Optional[bool] = None,
|
| 63 |
+
device: Optional[str] = None,
|
| 64 |
+
**kwargs,
|
| 65 |
+
):
|
| 66 |
+
"""Initializes a NumpyToTensor instance.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
as_learner_connector: Whether this ConnectorV2 piece is used inside a
|
| 70 |
+
LearnerConnectorPipeline or not.
|
| 71 |
+
pin_mempory: Whether to pin memory when creating (torch) tensors.
|
| 72 |
+
If None (default), pins memory if `as_learner_connector` is True,
|
| 73 |
+
otherwise doesn't pin memory.
|
| 74 |
+
device: An optional device to move the resulting tensors to. If not
|
| 75 |
+
provided, all data will be left on the CPU.
|
| 76 |
+
**kwargs:
|
| 77 |
+
"""
|
| 78 |
+
super().__init__(
|
| 79 |
+
input_observation_space=input_observation_space,
|
| 80 |
+
input_action_space=input_action_space,
|
| 81 |
+
**kwargs,
|
| 82 |
+
)
|
| 83 |
+
self._as_learner_connector = as_learner_connector
|
| 84 |
+
self._pin_memory = (
|
| 85 |
+
pin_mempory if pin_mempory is not None else self._as_learner_connector
|
| 86 |
+
)
|
| 87 |
+
self._device = device
|
| 88 |
+
|
| 89 |
+
@override(ConnectorV2)
|
| 90 |
+
def __call__(
|
| 91 |
+
self,
|
| 92 |
+
*,
|
| 93 |
+
rl_module: RLModule,
|
| 94 |
+
batch: Dict[str, Any],
|
| 95 |
+
episodes: List[EpisodeType],
|
| 96 |
+
explore: Optional[bool] = None,
|
| 97 |
+
shared_data: Optional[dict] = None,
|
| 98 |
+
**kwargs,
|
| 99 |
+
) -> Any:
|
| 100 |
+
is_single_agent = False
|
| 101 |
+
is_multi_rl_module = isinstance(rl_module, MultiRLModule)
|
| 102 |
+
# `data` already a ModuleID to batch mapping format.
|
| 103 |
+
if not (is_multi_rl_module and all(c in rl_module._rl_modules for c in batch)):
|
| 104 |
+
is_single_agent = True
|
| 105 |
+
batch = {DEFAULT_MODULE_ID: batch}
|
| 106 |
+
|
| 107 |
+
for module_id, module_data in batch.copy().items():
|
| 108 |
+
infos = module_data.pop(Columns.INFOS, None)
|
| 109 |
+
if rl_module.framework == "torch":
|
| 110 |
+
module_data = convert_to_torch_tensor(
|
| 111 |
+
module_data, pin_memory=self._pin_memory, device=self._device
|
| 112 |
+
)
|
| 113 |
+
else:
|
| 114 |
+
raise ValueError(
|
| 115 |
+
"`NumpyToTensor`does NOT support frameworks other than torch!"
|
| 116 |
+
)
|
| 117 |
+
if infos is not None:
|
| 118 |
+
module_data[Columns.INFOS] = infos
|
| 119 |
+
# Early out with data under(!) `DEFAULT_MODULE_ID`, b/c we are in plain
|
| 120 |
+
# single-agent mode.
|
| 121 |
+
if is_single_agent:
|
| 122 |
+
return module_data
|
| 123 |
+
batch[module_id] = module_data
|
| 124 |
+
|
| 125 |
+
return batch
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/tensor_to_numpy.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 4 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 5 |
+
from ray.rllib.utils.annotations import override
|
| 6 |
+
from ray.rllib.utils.numpy import convert_to_numpy
|
| 7 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 8 |
+
from ray.util.annotations import PublicAPI
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@PublicAPI(stability="alpha")
|
| 12 |
+
class TensorToNumpy(ConnectorV2):
|
| 13 |
+
"""Converts (framework) tensors across the entire input data into numpy arrays."""
|
| 14 |
+
|
| 15 |
+
@override(ConnectorV2)
|
| 16 |
+
def __call__(
|
| 17 |
+
self,
|
| 18 |
+
*,
|
| 19 |
+
rl_module: RLModule,
|
| 20 |
+
batch: Dict[str, Any],
|
| 21 |
+
episodes: List[EpisodeType],
|
| 22 |
+
explore: Optional[bool] = None,
|
| 23 |
+
shared_data: Optional[dict] = None,
|
| 24 |
+
**kwargs,
|
| 25 |
+
) -> Any:
|
| 26 |
+
return convert_to_numpy(batch)
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/env_to_module_pipeline.cpython-311.pyc
ADDED
|
Binary file (2.61 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/flatten_observations.cpython-311.pyc
ADDED
|
Binary file (9.32 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/frame_stacking.cpython-311.pyc
ADDED
|
Binary file (465 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/observation_preprocessor.cpython-311.pyc
ADDED
|
Binary file (3.77 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/write_observations_to_episodes.cpython-311.pyc
ADDED
|
Binary file (5.87 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_one_ts_to_episodes_and_truncate.cpython-311.pyc
ADDED
|
Binary file (6.34 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/frame_stacking.cpython-311.pyc
ADDED
|
Binary file (455 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/learner_connector_pipeline.cpython-311.pyc
ADDED
|
Binary file (2.75 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 4 |
+
from ray.rllib.core.columns import Columns
|
| 5 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 6 |
+
from ray.rllib.utils.annotations import override
|
| 7 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 8 |
+
from ray.util.annotations import PublicAPI
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@PublicAPI(stability="alpha")
|
| 12 |
+
class AddColumnsFromEpisodesToTrainBatch(ConnectorV2):
|
| 13 |
+
"""Adds infos/actions/rewards/terminateds/... to train batch.
|
| 14 |
+
|
| 15 |
+
Note: This is one of the default Learner ConnectorV2 pieces that are added
|
| 16 |
+
automatically by RLlib into every Learner connector pipeline, unless
|
| 17 |
+
`config.add_default_connectors_to_learner_pipeline` is set to False.
|
| 18 |
+
|
| 19 |
+
The default Learner connector pipeline is:
|
| 20 |
+
[
|
| 21 |
+
[0 or more user defined ConnectorV2 pieces],
|
| 22 |
+
AddObservationsFromEpisodesToBatch,
|
| 23 |
+
AddColumnsFromEpisodesToTrainBatch,
|
| 24 |
+
AddTimeDimToBatchAndZeroPad,
|
| 25 |
+
AddStatesFromEpisodesToBatch,
|
| 26 |
+
AgentToModuleMapping, # only in multi-agent setups!
|
| 27 |
+
BatchIndividualItems,
|
| 28 |
+
NumpyToTensor,
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
Does NOT add observations to train batch (these should have already been added
|
| 32 |
+
by another ConnectorV2 piece: `AddObservationsToTrainBatch` in the same pipeline).
|
| 33 |
+
|
| 34 |
+
If provided with `episodes` data, this connector piece makes sure that the final
|
| 35 |
+
train batch going into the RLModule for updating (`forward_train()` call) contains
|
| 36 |
+
at the minimum:
|
| 37 |
+
- Observations: From all episodes under the Columns.OBS key.
|
| 38 |
+
- Actions, rewards, terminal/truncation flags: From all episodes under the
|
| 39 |
+
respective keys.
|
| 40 |
+
- All data inside the episodes' `extra_model_outs` property, e.g. action logp and
|
| 41 |
+
action probs under the respective keys.
|
| 42 |
+
- Internal states: These will NOT be added to the batch by this connector piece
|
| 43 |
+
as this functionality is handled by a different default connector piece:
|
| 44 |
+
`AddStatesFromEpisodesToBatch`.
|
| 45 |
+
|
| 46 |
+
If the user wants to customize their own data under the given keys (e.g. obs,
|
| 47 |
+
actions, ...), they can extract from the episodes or recompute from `data`
|
| 48 |
+
their own data and store it in `data` under those keys. In this case, the default
|
| 49 |
+
connector will not change the data under these keys and simply act as a
|
| 50 |
+
pass-through.
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
@override(ConnectorV2)
|
| 54 |
+
def __call__(
|
| 55 |
+
self,
|
| 56 |
+
*,
|
| 57 |
+
rl_module: RLModule,
|
| 58 |
+
batch: Optional[Dict[str, Any]],
|
| 59 |
+
episodes: List[EpisodeType],
|
| 60 |
+
explore: Optional[bool] = None,
|
| 61 |
+
shared_data: Optional[dict] = None,
|
| 62 |
+
**kwargs,
|
| 63 |
+
) -> Any:
|
| 64 |
+
# Infos.
|
| 65 |
+
if Columns.INFOS not in batch:
|
| 66 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 67 |
+
episodes,
|
| 68 |
+
agents_that_stepped_only=False,
|
| 69 |
+
):
|
| 70 |
+
self.add_n_batch_items(
|
| 71 |
+
batch,
|
| 72 |
+
Columns.INFOS,
|
| 73 |
+
items_to_add=sa_episode.get_infos(slice(0, len(sa_episode))),
|
| 74 |
+
num_items=len(sa_episode),
|
| 75 |
+
single_agent_episode=sa_episode,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Actions.
|
| 79 |
+
if Columns.ACTIONS not in batch:
|
| 80 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 81 |
+
episodes,
|
| 82 |
+
agents_that_stepped_only=False,
|
| 83 |
+
):
|
| 84 |
+
self.add_n_batch_items(
|
| 85 |
+
batch,
|
| 86 |
+
Columns.ACTIONS,
|
| 87 |
+
items_to_add=[
|
| 88 |
+
sa_episode.get_actions(indices=ts)
|
| 89 |
+
for ts in range(len(sa_episode))
|
| 90 |
+
],
|
| 91 |
+
num_items=len(sa_episode),
|
| 92 |
+
single_agent_episode=sa_episode,
|
| 93 |
+
)
|
| 94 |
+
# Rewards.
|
| 95 |
+
if Columns.REWARDS not in batch:
|
| 96 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 97 |
+
episodes,
|
| 98 |
+
agents_that_stepped_only=False,
|
| 99 |
+
):
|
| 100 |
+
self.add_n_batch_items(
|
| 101 |
+
batch,
|
| 102 |
+
Columns.REWARDS,
|
| 103 |
+
items_to_add=[
|
| 104 |
+
sa_episode.get_rewards(indices=ts)
|
| 105 |
+
for ts in range(len(sa_episode))
|
| 106 |
+
],
|
| 107 |
+
num_items=len(sa_episode),
|
| 108 |
+
single_agent_episode=sa_episode,
|
| 109 |
+
)
|
| 110 |
+
# Terminateds.
|
| 111 |
+
if Columns.TERMINATEDS not in batch:
|
| 112 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 113 |
+
episodes,
|
| 114 |
+
agents_that_stepped_only=False,
|
| 115 |
+
):
|
| 116 |
+
self.add_n_batch_items(
|
| 117 |
+
batch,
|
| 118 |
+
Columns.TERMINATEDS,
|
| 119 |
+
items_to_add=(
|
| 120 |
+
[False] * (len(sa_episode) - 1) + [sa_episode.is_terminated]
|
| 121 |
+
if len(sa_episode) > 0
|
| 122 |
+
else []
|
| 123 |
+
),
|
| 124 |
+
num_items=len(sa_episode),
|
| 125 |
+
single_agent_episode=sa_episode,
|
| 126 |
+
)
|
| 127 |
+
# Truncateds.
|
| 128 |
+
if Columns.TRUNCATEDS not in batch:
|
| 129 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 130 |
+
episodes,
|
| 131 |
+
agents_that_stepped_only=False,
|
| 132 |
+
):
|
| 133 |
+
self.add_n_batch_items(
|
| 134 |
+
batch,
|
| 135 |
+
Columns.TRUNCATEDS,
|
| 136 |
+
items_to_add=(
|
| 137 |
+
[False] * (len(sa_episode) - 1) + [sa_episode.is_truncated]
|
| 138 |
+
if len(sa_episode) > 0
|
| 139 |
+
else []
|
| 140 |
+
),
|
| 141 |
+
num_items=len(sa_episode),
|
| 142 |
+
single_agent_episode=sa_episode,
|
| 143 |
+
)
|
| 144 |
+
# Extra model outputs (except for STATE_OUT, which will be handled by another
|
| 145 |
+
# default connector piece). Also, like with all the fields above, skip
|
| 146 |
+
# those that the user already seemed to have populated via custom connector
|
| 147 |
+
# pieces.
|
| 148 |
+
skip_columns = set(batch.keys()) | {Columns.STATE_IN, Columns.STATE_OUT}
|
| 149 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 150 |
+
episodes,
|
| 151 |
+
agents_that_stepped_only=False,
|
| 152 |
+
):
|
| 153 |
+
for column in sa_episode.extra_model_outputs.keys():
|
| 154 |
+
if column not in skip_columns:
|
| 155 |
+
self.add_n_batch_items(
|
| 156 |
+
batch,
|
| 157 |
+
column,
|
| 158 |
+
items_to_add=[
|
| 159 |
+
sa_episode.get_extra_model_outputs(key=column, indices=ts)
|
| 160 |
+
for ts in range(len(sa_episode))
|
| 161 |
+
],
|
| 162 |
+
num_items=len(sa_episode),
|
| 163 |
+
single_agent_episode=sa_episode,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
return batch
|
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
from ray.rllib.core.columns import Columns
|
| 4 |
+
from ray.rllib.connectors.connector_v2 import ConnectorV2
|
| 5 |
+
from ray.rllib.core.rl_module.rl_module import RLModule
|
| 6 |
+
from ray.rllib.utils.annotations import override
|
| 7 |
+
from ray.rllib.utils.typing import EpisodeType
|
| 8 |
+
from ray.util.annotations import PublicAPI
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@PublicAPI(stability="alpha")
|
| 12 |
+
class AddNextObservationsFromEpisodesToTrainBatch(ConnectorV2):
|
| 13 |
+
"""Adds the NEXT_OBS column with the correct episode observations to train batch.
|
| 14 |
+
|
| 15 |
+
- Operates on a list of Episode objects.
|
| 16 |
+
- Gets all observation(s) from all the given episodes (except the very first ones)
|
| 17 |
+
and adds them to the batch under construction in the NEXT_OBS column (as a list of
|
| 18 |
+
individual observations).
|
| 19 |
+
- Does NOT alter any observations (or other data) in the given episodes.
|
| 20 |
+
- Can be used in Learner connector pipelines.
|
| 21 |
+
|
| 22 |
+
.. testcode::
|
| 23 |
+
|
| 24 |
+
import gymnasium as gym
|
| 25 |
+
import numpy as np
|
| 26 |
+
|
| 27 |
+
from ray.rllib.connectors.learner import (
|
| 28 |
+
AddNextObservationsFromEpisodesToTrainBatch
|
| 29 |
+
)
|
| 30 |
+
from ray.rllib.core.columns import Columns
|
| 31 |
+
from ray.rllib.env.single_agent_episode import SingleAgentEpisode
|
| 32 |
+
from ray.rllib.utils.test_utils import check
|
| 33 |
+
|
| 34 |
+
# Create two dummy SingleAgentEpisodes, each containing 3 observations,
|
| 35 |
+
# 2 actions and 2 rewards (both episodes are length=2).
|
| 36 |
+
obs_space = gym.spaces.Box(-1.0, 1.0, (2,), np.float32)
|
| 37 |
+
act_space = gym.spaces.Discrete(2)
|
| 38 |
+
|
| 39 |
+
episodes = [SingleAgentEpisode(
|
| 40 |
+
observations=[obs_space.sample(), obs_space.sample(), obs_space.sample()],
|
| 41 |
+
actions=[act_space.sample(), act_space.sample()],
|
| 42 |
+
rewards=[1.0, 2.0],
|
| 43 |
+
len_lookback_buffer=0,
|
| 44 |
+
) for _ in range(2)]
|
| 45 |
+
eps_1_next_obses = episodes[0].get_observations([1, 2])
|
| 46 |
+
eps_2_next_obses = episodes[1].get_observations([1, 2])
|
| 47 |
+
print(f"1st Episode's next obses are {eps_1_next_obses}")
|
| 48 |
+
print(f"2nd Episode's next obses are {eps_2_next_obses}")
|
| 49 |
+
|
| 50 |
+
# Create an instance of this class.
|
| 51 |
+
connector = AddNextObservationsFromEpisodesToTrainBatch()
|
| 52 |
+
|
| 53 |
+
# Call the connector with the two created episodes.
|
| 54 |
+
# Note that this particular connector works without an RLModule, so we
|
| 55 |
+
# simplify here for the sake of this example.
|
| 56 |
+
output_data = connector(
|
| 57 |
+
rl_module=None,
|
| 58 |
+
batch={},
|
| 59 |
+
episodes=episodes,
|
| 60 |
+
explore=True,
|
| 61 |
+
shared_data={},
|
| 62 |
+
)
|
| 63 |
+
# The output data should now contain the last observations of both episodes,
|
| 64 |
+
# in a "per-episode organized" fashion.
|
| 65 |
+
check(
|
| 66 |
+
output_data,
|
| 67 |
+
{
|
| 68 |
+
Columns.NEXT_OBS: {
|
| 69 |
+
(episodes[0].id_,): eps_1_next_obses,
|
| 70 |
+
(episodes[1].id_,): eps_2_next_obses,
|
| 71 |
+
},
|
| 72 |
+
},
|
| 73 |
+
)
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
@override(ConnectorV2)
|
| 77 |
+
def __call__(
|
| 78 |
+
self,
|
| 79 |
+
*,
|
| 80 |
+
rl_module: RLModule,
|
| 81 |
+
batch: Dict[str, Any],
|
| 82 |
+
episodes: List[EpisodeType],
|
| 83 |
+
explore: Optional[bool] = None,
|
| 84 |
+
shared_data: Optional[dict] = None,
|
| 85 |
+
**kwargs,
|
| 86 |
+
) -> Any:
|
| 87 |
+
# If "obs" already in `batch`, early out.
|
| 88 |
+
if Columns.NEXT_OBS in batch:
|
| 89 |
+
return batch
|
| 90 |
+
|
| 91 |
+
for sa_episode in self.single_agent_episode_iterator(
|
| 92 |
+
# This is a Learner-only connector -> Get all episodes (for train batch).
|
| 93 |
+
episodes,
|
| 94 |
+
agents_that_stepped_only=False,
|
| 95 |
+
):
|
| 96 |
+
self.add_n_batch_items(
|
| 97 |
+
batch,
|
| 98 |
+
Columns.NEXT_OBS,
|
| 99 |
+
items_to_add=sa_episode.get_observations(slice(1, len(sa_episode) + 1)),
|
| 100 |
+
num_items=len(sa_episode),
|
| 101 |
+
single_agent_episode=sa_episode,
|
| 102 |
+
)
|
| 103 |
+
return batch
|