koichi12 commited on
Commit
54753b9
·
verified ·
1 Parent(s): 1f700b9

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .venv/lib/python3.11/site-packages/ray/rllib/__pycache__/__init__.cpython-311.pyc +0 -0
  2. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__init__.py +0 -0
  3. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/__init__.cpython-311.pyc +0 -0
  4. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/clip.cpython-311.pyc +0 -0
  5. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/immutable.cpython-311.pyc +0 -0
  6. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/lambdas.cpython-311.pyc +0 -0
  7. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/normalize.cpython-311.pyc +0 -0
  8. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/pipeline.cpython-311.pyc +0 -0
  9. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/clip.py +41 -0
  10. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/immutable.py +40 -0
  11. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/lambdas.py +76 -0
  12. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/normalize.py +44 -0
  13. .venv/lib/python3.11/site-packages/ray/rllib/connectors/action/pipeline.py +61 -0
  14. .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/__init__.cpython-311.pyc +0 -0
  15. .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/clip_reward.cpython-311.pyc +0 -0
  16. .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/env_sampling.cpython-311.pyc +0 -0
  17. .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/pipeline.cpython-311.pyc +0 -0
  18. .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/synced_filter.cpython-311.pyc +0 -0
  19. .venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/view_requirement.cpython-311.pyc +0 -0
  20. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__init__.py +22 -0
  21. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/__init__.cpython-311.pyc +0 -0
  22. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_observations_from_episodes_to_batch.cpython-311.pyc +0 -0
  23. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_states_from_episodes_to_batch.cpython-311.pyc +0 -0
  24. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_time_dim_to_batch_and_zero_pad.cpython-311.pyc +0 -0
  25. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/agent_to_module_mapping.cpython-311.pyc +0 -0
  26. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/batch_individual_items.cpython-311.pyc +0 -0
  27. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/frame_stacking.cpython-311.pyc +0 -0
  28. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/module_to_agent_unmapping.cpython-311.pyc +0 -0
  29. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/numpy_to_tensor.cpython-311.pyc +0 -0
  30. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/tensor_to_numpy.cpython-311.pyc +0 -0
  31. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_observations_from_episodes_to_batch.py +180 -0
  32. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_states_from_episodes_to_batch.py +348 -0
  33. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_time_dim_to_batch_and_zero_pad.py +302 -0
  34. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/agent_to_module_mapping.py +291 -0
  35. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/batch_individual_items.py +200 -0
  36. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/frame_stacking.py +147 -0
  37. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/module_to_agent_unmapping.py +48 -0
  38. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/numpy_to_tensor.py +125 -0
  39. .venv/lib/python3.11/site-packages/ray/rllib/connectors/common/tensor_to_numpy.py +26 -0
  40. .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/__init__.cpython-311.pyc +0 -0
  41. .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/env_to_module_pipeline.cpython-311.pyc +0 -0
  42. .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/flatten_observations.cpython-311.pyc +0 -0
  43. .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/frame_stacking.cpython-311.pyc +0 -0
  44. .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/observation_preprocessor.cpython-311.pyc +0 -0
  45. .venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/write_observations_to_episodes.cpython-311.pyc +0 -0
  46. .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_one_ts_to_episodes_and_truncate.cpython-311.pyc +0 -0
  47. .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/frame_stacking.cpython-311.pyc +0 -0
  48. .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/learner_connector_pipeline.cpython-311.pyc +0 -0
  49. .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py +166 -0
  50. .venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py +103 -0
.venv/lib/python3.11/site-packages/ray/rllib/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.5 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (200 Bytes). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/clip.cpython-311.pyc ADDED
Binary file (2.7 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/immutable.cpython-311.pyc ADDED
Binary file (2.3 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/lambdas.cpython-311.pyc ADDED
Binary file (3.73 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/normalize.cpython-311.pyc ADDED
Binary file (2.75 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/__pycache__/pipeline.cpython-311.pyc ADDED
Binary file (4.02 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/clip.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from ray.rllib.connectors.connector import (
4
+ ActionConnector,
5
+ ConnectorContext,
6
+ )
7
+ from ray.rllib.connectors.registry import register_connector
8
+ from ray.rllib.utils.spaces.space_utils import clip_action, get_base_struct_from_space
9
+ from ray.rllib.utils.typing import ActionConnectorDataType
10
+ from ray.rllib.utils.annotations import OldAPIStack
11
+
12
+
13
+ @OldAPIStack
14
+ class ClipActionsConnector(ActionConnector):
15
+ def __init__(self, ctx: ConnectorContext):
16
+ super().__init__(ctx)
17
+
18
+ self._action_space_struct = get_base_struct_from_space(ctx.action_space)
19
+
20
+ def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
21
+ assert isinstance(
22
+ ac_data.output, tuple
23
+ ), "Action connector requires PolicyOutputType data."
24
+
25
+ actions, states, fetches = ac_data.output
26
+ return ActionConnectorDataType(
27
+ ac_data.env_id,
28
+ ac_data.agent_id,
29
+ ac_data.input_dict,
30
+ (clip_action(actions, self._action_space_struct), states, fetches),
31
+ )
32
+
33
+ def to_state(self):
34
+ return ClipActionsConnector.__name__, None
35
+
36
+ @staticmethod
37
+ def from_state(ctx: ConnectorContext, params: Any):
38
+ return ClipActionsConnector(ctx)
39
+
40
+
41
+ register_connector(ClipActionsConnector.__name__, ClipActionsConnector)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/immutable.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ import tree # pip install dm_tree
4
+
5
+ from ray.rllib.connectors.connector import (
6
+ ActionConnector,
7
+ ConnectorContext,
8
+ )
9
+ from ray.rllib.connectors.registry import register_connector
10
+ from ray.rllib.utils.numpy import make_action_immutable
11
+ from ray.rllib.utils.typing import ActionConnectorDataType
12
+ from ray.rllib.utils.annotations import OldAPIStack
13
+
14
+
15
+ @OldAPIStack
16
+ class ImmutableActionsConnector(ActionConnector):
17
+ def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
18
+ assert isinstance(
19
+ ac_data.output, tuple
20
+ ), "Action connector requires PolicyOutputType data."
21
+
22
+ actions, states, fetches = ac_data.output
23
+ tree.traverse(make_action_immutable, actions, top_down=False)
24
+
25
+ return ActionConnectorDataType(
26
+ ac_data.env_id,
27
+ ac_data.agent_id,
28
+ ac_data.input_dict,
29
+ (actions, states, fetches),
30
+ )
31
+
32
+ def to_state(self):
33
+ return ImmutableActionsConnector.__name__, None
34
+
35
+ @staticmethod
36
+ def from_state(ctx: ConnectorContext, params: Any):
37
+ return ImmutableActionsConnector(ctx)
38
+
39
+
40
+ register_connector(ImmutableActionsConnector.__name__, ImmutableActionsConnector)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/lambdas.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable, Dict, Type
2
+
3
+ from ray.rllib.connectors.connector import (
4
+ ActionConnector,
5
+ ConnectorContext,
6
+ )
7
+ from ray.rllib.connectors.registry import register_connector
8
+ from ray.rllib.utils.numpy import convert_to_numpy
9
+ from ray.rllib.utils.typing import (
10
+ ActionConnectorDataType,
11
+ PolicyOutputType,
12
+ StateBatches,
13
+ TensorStructType,
14
+ )
15
+ from ray.rllib.utils.annotations import OldAPIStack
16
+
17
+
18
+ @OldAPIStack
19
+ def register_lambda_action_connector(
20
+ name: str, fn: Callable[[TensorStructType, StateBatches, Dict], PolicyOutputType]
21
+ ) -> Type[ActionConnector]:
22
+ """A util to register any function transforming PolicyOutputType as an ActionConnector.
23
+
24
+ The only requirement is that fn should take actions, states, and fetches as input,
25
+ and return transformed actions, states, and fetches.
26
+
27
+ Args:
28
+ name: Name of the resulting actor connector.
29
+ fn: The function that transforms PolicyOutputType.
30
+
31
+ Returns:
32
+ A new ActionConnector class that transforms PolicyOutputType using fn.
33
+ """
34
+
35
+ class LambdaActionConnector(ActionConnector):
36
+ def transform(
37
+ self, ac_data: ActionConnectorDataType
38
+ ) -> ActionConnectorDataType:
39
+ assert isinstance(
40
+ ac_data.output, tuple
41
+ ), "Action connector requires PolicyOutputType data."
42
+
43
+ actions, states, fetches = ac_data.output
44
+ return ActionConnectorDataType(
45
+ ac_data.env_id,
46
+ ac_data.agent_id,
47
+ ac_data.input_dict,
48
+ fn(actions, states, fetches),
49
+ )
50
+
51
+ def to_state(self):
52
+ return name, None
53
+
54
+ @staticmethod
55
+ def from_state(ctx: ConnectorContext, params: Any):
56
+ return LambdaActionConnector(ctx)
57
+
58
+ LambdaActionConnector.__name__ = name
59
+ LambdaActionConnector.__qualname__ = name
60
+
61
+ register_connector(name, LambdaActionConnector)
62
+
63
+ return LambdaActionConnector
64
+
65
+
66
+ # Convert actions and states into numpy arrays if necessary.
67
+ ConvertToNumpyConnector = OldAPIStack(
68
+ register_lambda_action_connector(
69
+ "ConvertToNumpyConnector",
70
+ lambda actions, states, fetches: (
71
+ convert_to_numpy(actions),
72
+ convert_to_numpy(states),
73
+ fetches,
74
+ ),
75
+ ),
76
+ )
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/normalize.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from ray.rllib.connectors.connector import (
4
+ ActionConnector,
5
+ ConnectorContext,
6
+ )
7
+ from ray.rllib.connectors.registry import register_connector
8
+ from ray.rllib.utils.spaces.space_utils import (
9
+ get_base_struct_from_space,
10
+ unsquash_action,
11
+ )
12
+ from ray.rllib.utils.typing import ActionConnectorDataType
13
+ from ray.rllib.utils.annotations import OldAPIStack
14
+
15
+
16
+ @OldAPIStack
17
+ class NormalizeActionsConnector(ActionConnector):
18
+ def __init__(self, ctx: ConnectorContext):
19
+ super().__init__(ctx)
20
+
21
+ self._action_space_struct = get_base_struct_from_space(ctx.action_space)
22
+
23
+ def transform(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
24
+ assert isinstance(
25
+ ac_data.output, tuple
26
+ ), "Action connector requires PolicyOutputType data."
27
+
28
+ actions, states, fetches = ac_data.output
29
+ return ActionConnectorDataType(
30
+ ac_data.env_id,
31
+ ac_data.agent_id,
32
+ ac_data.input_dict,
33
+ (unsquash_action(actions, self._action_space_struct), states, fetches),
34
+ )
35
+
36
+ def to_state(self):
37
+ return NormalizeActionsConnector.__name__, None
38
+
39
+ @staticmethod
40
+ def from_state(ctx: ConnectorContext, params: Any):
41
+ return NormalizeActionsConnector(ctx)
42
+
43
+
44
+ register_connector(NormalizeActionsConnector.__name__, NormalizeActionsConnector)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/action/pipeline.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, List
3
+ from collections import defaultdict
4
+
5
+ from ray.rllib.connectors.connector import (
6
+ ActionConnector,
7
+ Connector,
8
+ ConnectorContext,
9
+ ConnectorPipeline,
10
+ )
11
+ from ray.rllib.connectors.registry import get_connector, register_connector
12
+ from ray.rllib.utils.annotations import OldAPIStack
13
+ from ray.rllib.utils.typing import ActionConnectorDataType
14
+ from ray.util.timer import _Timer
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @OldAPIStack
21
+ class ActionConnectorPipeline(ConnectorPipeline, ActionConnector):
22
+ def __init__(self, ctx: ConnectorContext, connectors: List[Connector]):
23
+ super().__init__(ctx, connectors)
24
+ self.timers = defaultdict(_Timer)
25
+
26
+ def __call__(self, ac_data: ActionConnectorDataType) -> ActionConnectorDataType:
27
+ for c in self.connectors:
28
+ timer = self.timers[str(c)]
29
+ with timer:
30
+ ac_data = c(ac_data)
31
+ return ac_data
32
+
33
+ def to_state(self):
34
+ children = []
35
+ for c in self.connectors:
36
+ state = c.to_state()
37
+ assert isinstance(state, tuple) and len(state) == 2, (
38
+ "Serialized connector state must be in the format of "
39
+ f"Tuple[name: str, params: Any]. Instead we got {state}"
40
+ f"for connector {c.__name__}."
41
+ )
42
+ children.append(state)
43
+ return ActionConnectorPipeline.__name__, children
44
+
45
+ @staticmethod
46
+ def from_state(ctx: ConnectorContext, params: Any):
47
+ assert (
48
+ type(params) is list
49
+ ), "ActionConnectorPipeline takes a list of connector params."
50
+ connectors = []
51
+ for state in params:
52
+ try:
53
+ name, subparams = state
54
+ connectors.append(get_connector(name, ctx, subparams))
55
+ except Exception as e:
56
+ logger.error(f"Failed to de-serialize connector state: {state}")
57
+ raise e
58
+ return ActionConnectorPipeline(ctx, connectors)
59
+
60
+
61
+ register_connector(ActionConnectorPipeline.__name__, ActionConnectorPipeline)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (199 Bytes). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/clip_reward.cpython-311.pyc ADDED
Binary file (3.06 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/env_sampling.cpython-311.pyc ADDED
Binary file (2.11 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/pipeline.cpython-311.pyc ADDED
Binary file (4.74 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/synced_filter.cpython-311.pyc ADDED
Binary file (3.05 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/view_requirement.cpython-311.pyc ADDED
Binary file (6.24 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import (
2
+ AddObservationsFromEpisodesToBatch,
3
+ )
4
+ from ray.rllib.connectors.common.add_states_from_episodes_to_batch import (
5
+ AddStatesFromEpisodesToBatch,
6
+ )
7
+ from ray.rllib.connectors.common.add_time_dim_to_batch_and_zero_pad import (
8
+ AddTimeDimToBatchAndZeroPad,
9
+ )
10
+ from ray.rllib.connectors.common.agent_to_module_mapping import AgentToModuleMapping
11
+ from ray.rllib.connectors.common.batch_individual_items import BatchIndividualItems
12
+ from ray.rllib.connectors.common.numpy_to_tensor import NumpyToTensor
13
+
14
+
15
+ __all__ = [
16
+ "AddObservationsFromEpisodesToBatch",
17
+ "AddStatesFromEpisodesToBatch",
18
+ "AddTimeDimToBatchAndZeroPad",
19
+ "AgentToModuleMapping",
20
+ "BatchIndividualItems",
21
+ "NumpyToTensor",
22
+ ]
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.02 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_observations_from_episodes_to_batch.cpython-311.pyc ADDED
Binary file (7.36 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_states_from_episodes_to_batch.cpython-311.pyc ADDED
Binary file (13.7 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/add_time_dim_to_batch_and_zero_pad.cpython-311.pyc ADDED
Binary file (12 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/agent_to_module_mapping.cpython-311.pyc ADDED
Binary file (12.1 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/batch_individual_items.cpython-311.pyc ADDED
Binary file (8.01 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/frame_stacking.cpython-311.pyc ADDED
Binary file (7.19 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/module_to_agent_unmapping.cpython-311.pyc ADDED
Binary file (2.83 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/numpy_to_tensor.cpython-311.pyc ADDED
Binary file (6.02 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/__pycache__/tensor_to_numpy.cpython-311.pyc ADDED
Binary file (1.79 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_observations_from_episodes_to_batch.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import gymnasium as gym
4
+
5
+ from ray.rllib.core.columns import Columns
6
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
7
+ from ray.rllib.core.rl_module.rl_module import RLModule
8
+ from ray.rllib.utils.annotations import override
9
+ from ray.rllib.utils.typing import EpisodeType
10
+ from ray.util.annotations import PublicAPI
11
+
12
+
13
+ @PublicAPI(stability="alpha")
14
+ class AddObservationsFromEpisodesToBatch(ConnectorV2):
15
+ """Gets the last observation from a running episode and adds it to the batch.
16
+
17
+ Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
18
+ are added automatically by RLlib into every env-to-module/Learner connector
19
+ pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
20
+ `config.add_default_connectors_to_learner_pipeline ` are set to
21
+ False.
22
+
23
+ The default env-to-module connector pipeline is:
24
+ [
25
+ [0 or more user defined ConnectorV2 pieces],
26
+ AddObservationsFromEpisodesToBatch,
27
+ AddTimeDimToBatchAndZeroPad,
28
+ AddStatesFromEpisodesToBatch,
29
+ AgentToModuleMapping, # only in multi-agent setups!
30
+ BatchIndividualItems,
31
+ NumpyToTensor,
32
+ ]
33
+ The default Learner connector pipeline is:
34
+ [
35
+ [0 or more user defined ConnectorV2 pieces],
36
+ AddObservationsFromEpisodesToBatch,
37
+ AddColumnsFromEpisodesToTrainBatch,
38
+ AddTimeDimToBatchAndZeroPad,
39
+ AddStatesFromEpisodesToBatch,
40
+ AgentToModuleMapping, # only in multi-agent setups!
41
+ BatchIndividualItems,
42
+ NumpyToTensor,
43
+ ]
44
+
45
+ This ConnectorV2:
46
+ - Operates on a list of Episode objects (single- or multi-agent).
47
+ - Gets the most recent observation(s) from all the given episodes and adds them
48
+ to the batch under construction (as a list of individual observations).
49
+ - Does NOT alter any observations (or other data) in the given episodes.
50
+ - Can be used in EnvToModule and Learner connector pipelines.
51
+
52
+ .. testcode::
53
+
54
+ import gymnasium as gym
55
+ import numpy as np
56
+
57
+ from ray.rllib.connectors.common import AddObservationsFromEpisodesToBatch
58
+ from ray.rllib.env.single_agent_episode import SingleAgentEpisode
59
+ from ray.rllib.utils.test_utils import check
60
+
61
+ # Create two dummy SingleAgentEpisodes, each containing 2 observations,
62
+ # 1 action and 1 reward (both are length=1).
63
+ obs_space = gym.spaces.Box(-1.0, 1.0, (2,), np.float32)
64
+ act_space = gym.spaces.Discrete(2)
65
+
66
+ episodes = [SingleAgentEpisode(
67
+ observations=[obs_space.sample(), obs_space.sample()],
68
+ actions=[act_space.sample()],
69
+ rewards=[1.0],
70
+ len_lookback_buffer=0,
71
+ ) for _ in range(2)]
72
+ eps_1_last_obs = episodes[0].get_observations(-1)
73
+ eps_2_last_obs = episodes[1].get_observations(-1)
74
+ print(f"1st Episode's last obs is {eps_1_last_obs}")
75
+ print(f"2nd Episode's last obs is {eps_2_last_obs}")
76
+
77
+ # Create an instance of this class.
78
+ connector = AddObservationsFromEpisodesToBatch()
79
+
80
+ # Call the connector with the two created episodes.
81
+ # Note that this particular connector works without an RLModule, so we
82
+ # simplify here for the sake of this example.
83
+ output_batch = connector(
84
+ rl_module=None,
85
+ batch={},
86
+ episodes=episodes,
87
+ explore=True,
88
+ shared_data={},
89
+ )
90
+ # The output data should now contain the last observations of both episodes,
91
+ # in a "per-episode organized" fashion.
92
+ check(
93
+ output_batch,
94
+ {
95
+ "obs": {
96
+ (episodes[0].id_,): [eps_1_last_obs],
97
+ (episodes[1].id_,): [eps_2_last_obs],
98
+ },
99
+ },
100
+ )
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ input_observation_space: Optional[gym.Space] = None,
106
+ input_action_space: Optional[gym.Space] = None,
107
+ *,
108
+ as_learner_connector: bool = False,
109
+ **kwargs,
110
+ ):
111
+ """Initializes a AddObservationsFromEpisodesToBatch instance.
112
+
113
+ Args:
114
+ as_learner_connector: Whether this connector is part of a Learner connector
115
+ pipeline, as opposed to a env-to-module pipeline. As a Learner
116
+ connector, it will add an entire Episode's observations (each timestep)
117
+ to the batch.
118
+ """
119
+ super().__init__(
120
+ input_observation_space=input_observation_space,
121
+ input_action_space=input_action_space,
122
+ **kwargs,
123
+ )
124
+
125
+ self._as_learner_connector = as_learner_connector
126
+
127
+ @override(ConnectorV2)
128
+ def __call__(
129
+ self,
130
+ *,
131
+ rl_module: RLModule,
132
+ batch: Dict[str, Any],
133
+ episodes: List[EpisodeType],
134
+ explore: Optional[bool] = None,
135
+ shared_data: Optional[dict] = None,
136
+ **kwargs,
137
+ ) -> Any:
138
+ # If "obs" already in data, early out.
139
+ if Columns.OBS in batch:
140
+ return batch
141
+ for i, sa_episode in enumerate(
142
+ self.single_agent_episode_iterator(
143
+ episodes,
144
+ # If Learner connector, get all episodes (for train batch).
145
+ # If EnvToModule, get only those ongoing episodes that just had their
146
+ # agent step (b/c those are the ones we need to compute actions for
147
+ # next).
148
+ agents_that_stepped_only=not self._as_learner_connector,
149
+ )
150
+ ):
151
+ if self._as_learner_connector:
152
+ # TODO (sven): Resolve this hack by adding a new connector piece that
153
+ # performs this very task.
154
+ if "_" not in sa_episode.id_:
155
+ sa_episode.id_ += "_" + str(i)
156
+
157
+ self.add_n_batch_items(
158
+ batch,
159
+ Columns.OBS,
160
+ # Add all observations, except the very last one.
161
+ # For a terminated episode, this is the terminal observation that
162
+ # has no value for training.
163
+ # For a truncated episode, algorithms either add an extra NEXT_OBS
164
+ # column to the batch (ex. DQN) or extend the episode length by one
165
+ # (using a separate connector piece and this truncated last obs),
166
+ # then bootstrap the value estimation for that extra timestep.
167
+ items_to_add=sa_episode.get_observations(slice(0, len(sa_episode))),
168
+ num_items=len(sa_episode),
169
+ single_agent_episode=sa_episode,
170
+ )
171
+ else:
172
+ assert not sa_episode.is_numpy
173
+ self.add_batch_item(
174
+ batch,
175
+ Columns.OBS,
176
+ item_to_add=sa_episode.get_observations(-1),
177
+ single_agent_episode=sa_episode,
178
+ )
179
+
180
+ return batch
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_states_from_episodes_to_batch.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import gymnasium as gym
5
+ import numpy as np
6
+ import tree # pip install dm_tree
7
+
8
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
9
+ from ray.rllib.core import DEFAULT_MODULE_ID
10
+ from ray.rllib.core.columns import Columns
11
+ from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
12
+ from ray.rllib.core.rl_module.rl_module import RLModule
13
+ from ray.rllib.utils.annotations import override
14
+ from ray.rllib.utils.numpy import convert_to_numpy
15
+ from ray.rllib.utils.typing import EpisodeType
16
+ from ray.util.annotations import PublicAPI
17
+
18
+
19
+ @PublicAPI(stability="alpha")
20
+ class AddStatesFromEpisodesToBatch(ConnectorV2):
21
+ """Gets last STATE_OUT from running episode and adds it as STATE_IN to the batch.
22
+
23
+ Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
24
+ are added automatically by RLlib into every env-to-module/Learner connector
25
+ pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
26
+ `config.add_default_connectors_to_learner_pipeline ` are set to
27
+ False.
28
+
29
+ The default env-to-module connector pipeline is:
30
+ [
31
+ [0 or more user defined ConnectorV2 pieces],
32
+ AddObservationsFromEpisodesToBatch,
33
+ AddTimeDimToBatchAndZeroPad,
34
+ AddStatesFromEpisodesToBatch,
35
+ AgentToModuleMapping, # only in multi-agent setups!
36
+ BatchIndividualItems,
37
+ NumpyToTensor,
38
+ ]
39
+ The default Learner connector pipeline is:
40
+ [
41
+ [0 or more user defined ConnectorV2 pieces],
42
+ AddObservationsFromEpisodesToBatch,
43
+ AddColumnsFromEpisodesToTrainBatch,
44
+ AddTimeDimToBatchAndZeroPad,
45
+ AddStatesFromEpisodesToBatch,
46
+ AgentToModuleMapping, # only in multi-agent setups!
47
+ BatchIndividualItems,
48
+ NumpyToTensor,
49
+ ]
50
+
51
+ If the RLModule is stateful, the episodes' STATE_OUTS will be extracted
52
+ and restructured under a new STATE_IN key.
53
+ As a Learner connector, the resulting STATE_IN batch has the shape (B', ...).
54
+ Here, B' is the sum of splits we have to do over the given episodes, such that each
55
+ chunk is at most `max_seq_len` long (T-axis).
56
+ As a EnvToModule connector, the resulting STATE_IN batch simply consists of n
57
+ states coming from n vectorized environments/episodes.
58
+
59
+ Also, all other data (observations, rewards, etc.. if applicable) will be properly
60
+ reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
61
+ zero-padded, if necessary.
62
+
63
+ This ConnectorV2:
64
+ - Operates on a list of Episode objects.
65
+ - Gets the most recent STATE_OUT from all the given episodes and adds them under
66
+ the STATE_IN key to the batch under construction.
67
+ - Does NOT alter any data in the given episodes.
68
+ - Can be used in EnvToModule and Learner connector pipelines.
69
+
70
+ .. testcode::
71
+
72
+ from ray.rllib.connectors.common import AddStatesFromEpisodesToBatch
73
+ from ray.rllib.core.columns import Columns
74
+ from ray.rllib.env.single_agent_episode import SingleAgentEpisode
75
+ from ray.rllib.utils.test_utils import check
76
+
77
+ # Create a simple dummy class, pretending to be an RLModule with
78
+ # `get_initial_state`, `is_stateful` and `model_config` property defined:
79
+ class MyStateModule:
80
+ # dummy config
81
+ model_config = {"max_seq_len": 2}
82
+
83
+ def is_stateful(self):
84
+ return True
85
+
86
+ def get_initial_state(self):
87
+ return 0.0
88
+
89
+
90
+ # Create an empty episode. The connector should use the RLModule's initial state
91
+ # to populate STATE_IN for the next forward pass.
92
+ episode = SingleAgentEpisode()
93
+
94
+ rl_module = MyStateModule()
95
+ rl_module_init_state = rl_module.get_initial_state()
96
+
97
+ # Create an instance of this class (as a env-to-module connector).
98
+ connector = AddStatesFromEpisodesToBatch(as_learner_connector=False)
99
+
100
+ # Call the connector.
101
+ output_batch = connector(
102
+ rl_module=rl_module,
103
+ batch={},
104
+ episodes=[episode],
105
+ shared_data={},
106
+ )
107
+ # The output data's STATE_IN key should now contain the RLModule's initial state
108
+ # plus the one state out found in the episode in a "per-episode organized"
109
+ # fashion.
110
+ check(
111
+ output_batch[Columns.STATE_IN],
112
+ {
113
+ (episode.id_,): [rl_module_init_state],
114
+ },
115
+ )
116
+
117
+ # Create a SingleAgentEpisodes containing 5 observations,
118
+ # 4 actions and 4 rewards, and 4 STATE_OUTs.
119
+ # The same connector should now use the episode-stored last STATE_OUT as
120
+ # STATE_IN for the next forward pass.
121
+ episode = SingleAgentEpisode(
122
+ observations=[0, 1, 2, 3, 4],
123
+ actions=[1, 2, 3, 4],
124
+ rewards=[1.0, 2.0, 3.0, 4.0],
125
+ # STATE_OUT in episode will show up under STATE_IN in the batch.
126
+ extra_model_outputs={
127
+ Columns.STATE_OUT: [-4.0, -3.0, -2.0, -1.0],
128
+ },
129
+ len_lookback_buffer = 0,
130
+ )
131
+
132
+ # Call the connector.
133
+ output_batch = connector(
134
+ rl_module=rl_module,
135
+ batch={},
136
+ episodes=[episode],
137
+ shared_data={},
138
+ )
139
+ # The output data's STATE_IN key should now contain the episode's last
140
+ # STATE_OUT, NOT the RLModule's initial state in a "per-episode organized"
141
+ # fashion.
142
+ check(
143
+ output_batch[Columns.STATE_IN],
144
+ {
145
+ # Expect the episode's last STATE_OUT.
146
+ (episode.id_,): [-1.0],
147
+ },
148
+ )
149
+
150
+ # Create a new connector as a learner connector with a RNN seq len of 2 (for
151
+ # testing purposes only). Passing the same data through this learner connector,
152
+ # we expect the STATE_IN data to contain a) the initial module state and then
153
+ # every 2nd STATE_OUT stored in the episode.
154
+ connector = AddStatesFromEpisodesToBatch(as_learner_connector=True)
155
+
156
+ # Call the connector.
157
+ output_batch = connector(
158
+ rl_module=rl_module,
159
+ batch={},
160
+ episodes=[episode],
161
+ shared_data={},
162
+ )
163
+ check(
164
+ output_batch[Columns.STATE_IN],
165
+ {
166
+ # Expect initial module state + every 2nd STATE_OUT from episode, but
167
+ # not the very last one (just like the very last observation, this data
168
+ # is NOT passed through the forward_train, b/c there is nothing to learn
169
+ # at that timestep, unless we need to compute e.g. bootstrap value
170
+ # predictions).
171
+ # Also note that the different STATE_IN timesteps are already present
172
+ # as one batched item per episode in the list.
173
+ (episode.id_,): [rl_module_init_state, -3.0],
174
+ },
175
+ )
176
+ """
177
+
178
+ def __init__(
179
+ self,
180
+ input_observation_space: Optional[gym.Space] = None,
181
+ input_action_space: Optional[gym.Space] = None,
182
+ *,
183
+ as_learner_connector: bool = False,
184
+ **kwargs,
185
+ ):
186
+ """Initializes a AddObservationsFromEpisodesToBatch instance.
187
+
188
+ Args:
189
+ as_learner_connector: Whether this connector is part of a Learner connector
190
+ pipeline, as opposed to a env-to-module pipeline. As a Learner
191
+ connector, it will add an entire Episode's observations (each timestep)
192
+ to the batch.
193
+ """
194
+ super().__init__(
195
+ input_observation_space=input_observation_space,
196
+ input_action_space=input_action_space,
197
+ **kwargs,
198
+ )
199
+
200
+ self._as_learner_connector = as_learner_connector
201
+
202
+ @override(ConnectorV2)
203
+ def __call__(
204
+ self,
205
+ *,
206
+ rl_module: RLModule,
207
+ batch: Dict[str, Any],
208
+ episodes: List[EpisodeType],
209
+ explore: Optional[bool] = None,
210
+ shared_data: Optional[dict] = None,
211
+ **kwargs,
212
+ ) -> Any:
213
+ # If not stateful OR STATE_IN already in data, early out.
214
+ if not rl_module.is_stateful() or Columns.STATE_IN in batch:
215
+ return batch
216
+
217
+ for sa_episode in self.single_agent_episode_iterator(
218
+ episodes,
219
+ # If Learner connector, get all episodes (for train batch).
220
+ # If EnvToModule, get only those ongoing episodes that just had their
221
+ # agent step (b/c those are the ones we need to compute actions for next).
222
+ agents_that_stepped_only=not self._as_learner_connector,
223
+ ):
224
+ if self._as_learner_connector:
225
+ # Multi-agent case: Extract correct single agent RLModule (to get its
226
+ # individual state).
227
+ if sa_episode.module_id is not None:
228
+ sa_module = rl_module[sa_episode.module_id]
229
+ else:
230
+ sa_module = (
231
+ rl_module[DEFAULT_MODULE_ID]
232
+ if isinstance(rl_module, MultiRLModule)
233
+ else rl_module
234
+ )
235
+ # This single-agent RLModule is NOT stateful -> Skip.
236
+ if not sa_module.is_stateful():
237
+ continue
238
+
239
+ max_seq_len = sa_module.model_config["max_seq_len"]
240
+
241
+ # look_back_state.shape=([state-dim],)
242
+ look_back_state = (
243
+ # Episode has a (reset) beginning -> Prepend initial
244
+ # state.
245
+ convert_to_numpy(sa_module.get_initial_state())
246
+ if sa_episode.t_started == 0
247
+ or (Columns.STATE_OUT not in sa_episode.extra_model_outputs)
248
+ # Episode starts somewhere in the middle (is a cut
249
+ # continuation chunk) -> Use previous chunk's last
250
+ # STATE_OUT as initial state.
251
+ else sa_episode.get_extra_model_outputs(
252
+ key=Columns.STATE_OUT,
253
+ indices=-1,
254
+ neg_index_as_lookback=True,
255
+ )
256
+ )
257
+ # If we have `"state_out"`s (e.g. from rollouts) use them for the
258
+ # `"state_in"`s.
259
+ if Columns.STATE_OUT in sa_episode.extra_model_outputs:
260
+ # state_outs.shape=(T,[state-dim]) T=episode len
261
+ state_outs = sa_episode.get_extra_model_outputs(
262
+ key=Columns.STATE_OUT
263
+ )
264
+ # Otherwise, we have no `"state_out"` (e.g. because we are sampling
265
+ # from offline data and the expert policy was not stateful).
266
+ else:
267
+ # Then simply use the `look_back_state`, i.e. in this case the
268
+ # initial state as `"state_in` in training.
269
+ if sa_episode.is_numpy:
270
+ state_outs = tree.map_structure(
271
+ lambda a, _sae=sa_episode: np.repeat(
272
+ a[np.newaxis, ...], len(_sae), axis=0
273
+ ),
274
+ look_back_state,
275
+ )
276
+ else:
277
+ state_outs = [look_back_state for _ in range(len(sa_episode))]
278
+ # Explanation:
279
+ # B=episode len // max_seq_len
280
+ # [::max_seq_len]: only keep every Tth state.
281
+ # [:-1]: Shift state outs by one; ignore very last
282
+ # STATE_OUT, but therefore add the lookback/init state at
283
+ # the beginning.
284
+ items_to_add = (
285
+ tree.map_structure(
286
+ lambda i, o, m=max_seq_len: np.concatenate([[i], o[:-1]])[::m],
287
+ look_back_state,
288
+ state_outs,
289
+ )
290
+ if sa_episode.is_numpy
291
+ else ([look_back_state] + state_outs[:-1])[::max_seq_len]
292
+ )
293
+ self.add_n_batch_items(
294
+ batch=batch,
295
+ column=Columns.STATE_IN,
296
+ items_to_add=items_to_add,
297
+ num_items=int(math.ceil(len(sa_episode) / max_seq_len)),
298
+ single_agent_episode=sa_episode,
299
+ )
300
+ if Columns.NEXT_OBS in batch:
301
+ items_to_add = (
302
+ tree.map_structure(
303
+ lambda i, m=max_seq_len: i[::m],
304
+ state_outs,
305
+ )
306
+ if sa_episode.is_numpy
307
+ else state_outs[::max_seq_len]
308
+ )
309
+ self.add_n_batch_items(
310
+ batch=batch,
311
+ column=Columns.NEXT_STATE_IN,
312
+ items_to_add=items_to_add,
313
+ num_items=int(math.ceil(len(sa_episode) / max_seq_len)),
314
+ single_agent_episode=sa_episode,
315
+ )
316
+
317
+ else:
318
+ assert not sa_episode.is_numpy
319
+
320
+ # Multi-agent case: Extract correct single agent RLModule (to get the
321
+ # state for individually).
322
+ sa_module = rl_module
323
+ if sa_episode.module_id is not None:
324
+ sa_module = rl_module[sa_episode.module_id]
325
+ # This single-agent RLModule is NOT stateful -> Skip.
326
+ if not sa_module.is_stateful():
327
+ continue
328
+
329
+ # Episode just started or has no `"state_out"` (e.g. in offline
330
+ # sampling) -> Get initial state from our RLModule.
331
+ if (sa_episode.t_started == 0 and len(sa_episode) == 0) or (
332
+ Columns.STATE_OUT not in sa_episode.extra_model_outputs
333
+ ):
334
+ state = sa_module.get_initial_state()
335
+ # Episode is already ongoing -> Use most recent STATE_OUT.
336
+ else:
337
+ state = sa_episode.get_extra_model_outputs(
338
+ key=Columns.STATE_OUT,
339
+ indices=-1,
340
+ )
341
+ self.add_batch_item(
342
+ batch,
343
+ Columns.STATE_IN,
344
+ item_to_add=state,
345
+ single_agent_episode=sa_episode,
346
+ )
347
+
348
+ return batch
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/add_time_dim_to_batch_and_zero_pad.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import gymnasium as gym
4
+ import numpy as np
5
+ import tree # pip install dm_tree
6
+
7
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
8
+ from ray.rllib.core import DEFAULT_MODULE_ID
9
+ from ray.rllib.core.columns import Columns
10
+ from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
11
+ from ray.rllib.core.rl_module.rl_module import RLModule
12
+ from ray.rllib.utils.annotations import override
13
+ from ray.rllib.utils.postprocessing.zero_padding import (
14
+ create_mask_and_seq_lens,
15
+ split_and_zero_pad,
16
+ )
17
+ from ray.rllib.utils.spaces.space_utils import BatchedNdArray
18
+ from ray.rllib.utils.typing import EpisodeType
19
+ from ray.util.annotations import PublicAPI
20
+
21
+
22
+ @PublicAPI(stability="alpha")
23
+ class AddTimeDimToBatchAndZeroPad(ConnectorV2):
24
+ """Adds an extra time dim (axis=1) to all data currently in the batch.
25
+
26
+ Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
27
+ are added automatically by RLlib into every env-to-module/Learner connector
28
+ pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
29
+ `config.add_default_connectors_to_learner_pipeline ` are set to
30
+ False.
31
+
32
+ The default env-to-module connector pipeline is:
33
+ [
34
+ [0 or more user defined ConnectorV2 pieces],
35
+ AddObservationsFromEpisodesToBatch,
36
+ AddTimeDimToBatchAndZeroPad,
37
+ AddStatesFromEpisodesToBatch,
38
+ AgentToModuleMapping, # only in multi-agent setups!
39
+ BatchIndividualItems,
40
+ NumpyToTensor,
41
+ ]
42
+ The default Learner connector pipeline is:
43
+ [
44
+ [0 or more user defined ConnectorV2 pieces],
45
+ AddObservationsFromEpisodesToBatch,
46
+ AddColumnsFromEpisodesToTrainBatch,
47
+ AddTimeDimToBatchAndZeroPad,
48
+ AddStatesFromEpisodesToBatch,
49
+ AgentToModuleMapping, # only in multi-agent setups!
50
+ BatchIndividualItems,
51
+ NumpyToTensor,
52
+ ]
53
+
54
+ If the RLModule is stateful, an extra time dim at axis=1 is added to all data in the
55
+ batch.
56
+
57
+ Also, all data (observations, rewards, etc.. if applicable) will be properly
58
+ reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
59
+ zero-padded, if necessary.
60
+
61
+ This ConnectorV2:
62
+ - Operates on a list of Episode objects.
63
+ - Adds a time dim at axis=1 to all columns already in the batch.
64
+ - In case of a learner connector pipeline, zero-pads the data according to the
65
+ module's `self.model_config["max_seq_len"]` setting and reshapes all data to
66
+ (B, T, ...). The connector also adds SEQ_LENS information and loss mask
67
+ information to the batch based on the added zero-padding.
68
+ - Does NOT alter any data in the given episodes.
69
+ - Can be used in EnvToModule and Learner connector pipelines.
70
+
71
+ .. testcode::
72
+
73
+ from ray.rllib.connectors.common import AddTimeDimToBatchAndZeroPad
74
+ from ray.rllib.core.columns import Columns
75
+ from ray.rllib.env.single_agent_episode import SingleAgentEpisode
76
+ from ray.rllib.utils.test_utils import check
77
+
78
+
79
+ # Create a simple dummy class, pretending to be an RLModule with
80
+ # `get_initial_state`, `is_stateful` and `model_config` property defined:
81
+ class MyStateModule:
82
+ # dummy config
83
+ model_config = {"max_seq_len": 3}
84
+
85
+ def is_stateful(self):
86
+ return True
87
+
88
+ def get_initial_state(self):
89
+ return 0.0
90
+
91
+
92
+ # Create an already reset episode. Expect the connector to add a time-dim to the
93
+ # reset observation.
94
+ episode = SingleAgentEpisode(observations=[0])
95
+
96
+ rl_module = MyStateModule()
97
+
98
+ # Create an instance of this class (as an env-to-module connector).
99
+ connector = AddTimeDimToBatchAndZeroPad(as_learner_connector=False)
100
+
101
+ # Call the connector.
102
+ output_batch = connector(
103
+ rl_module=rl_module,
104
+ batch={Columns.OBS: [0]},
105
+ episodes=[episode],
106
+ shared_data={},
107
+ )
108
+ # The output data's OBS key should now be reshaped to (B, T)
109
+ check(output_batch[Columns.OBS], [[0]])
110
+
111
+ # Create a SingleAgentEpisodes containing 5 observations,
112
+ # 4 actions and 4 rewards.
113
+ episode = SingleAgentEpisode(
114
+ observations=[0, 1, 2, 3, 4],
115
+ actions=[1, 2, 3, 4],
116
+ rewards=[1.0, 2.0, 3.0, 4.0],
117
+ len_lookback_buffer=0,
118
+ )
119
+
120
+ # Call the connector.
121
+ output_batch = connector(
122
+ rl_module=rl_module,
123
+ batch={Columns.OBS: [4]},
124
+ episodes=[episode],
125
+ shared_data={},
126
+ )
127
+ # The output data's OBS, ACTIONS, and REWARDS keys should now all have a time
128
+ # rank.
129
+ check(
130
+ # Expect the episode's last OBS.
131
+ output_batch[Columns.OBS], [[4]],
132
+ )
133
+
134
+ # Create a new connector as a learner connector with a RNN seq len of 4 (for
135
+ # testing purposes only). Passing the same data through this learner connector,
136
+ # we expect the data to also be zero-padded.
137
+ connector = AddTimeDimToBatchAndZeroPad(as_learner_connector=True)
138
+
139
+ # Call the connector.
140
+ output_batch = connector(
141
+ rl_module=rl_module,
142
+ batch={Columns.OBS: {(episode.id_,): [0, 1, 2, 3]}},
143
+ episodes=[episode],
144
+ shared_data={},
145
+ )
146
+ check(output_batch[Columns.OBS], {(episode.id_,): [[0, 1, 2], [3, 0, 0]]})
147
+ """
148
+
149
+ def __init__(
150
+ self,
151
+ input_observation_space: Optional[gym.Space] = None,
152
+ input_action_space: Optional[gym.Space] = None,
153
+ *,
154
+ as_learner_connector: bool = False,
155
+ **kwargs,
156
+ ):
157
+ """Initializes a AddObservationsFromEpisodesToBatch instance.
158
+
159
+ Args:
160
+ as_learner_connector: Whether this connector is part of a Learner connector
161
+ pipeline, as opposed to a env-to-module pipeline. As a Learner
162
+ connector, it will add an entire Episode's observations (each timestep)
163
+ to the batch.
164
+ """
165
+ super().__init__(
166
+ input_observation_space=input_observation_space,
167
+ input_action_space=input_action_space,
168
+ **kwargs,
169
+ )
170
+
171
+ self._as_learner_connector = as_learner_connector
172
+
173
+ @override(ConnectorV2)
174
+ def __call__(
175
+ self,
176
+ *,
177
+ rl_module: RLModule,
178
+ batch: Dict[str, Any],
179
+ episodes: List[EpisodeType],
180
+ explore: Optional[bool] = None,
181
+ shared_data: Optional[dict] = None,
182
+ **kwargs,
183
+ ) -> Any:
184
+
185
+ # If not stateful OR STATE_IN already in data, early out.
186
+ if not rl_module.is_stateful() or Columns.STATE_IN in batch:
187
+ return batch
188
+
189
+ # Make all inputs (other than STATE_IN) have an additional T-axis.
190
+ # Since data has not been batched yet (we are still operating on lists in the
191
+ # batch), we add this time axis as 0 (not 1). When we batch, the batch axis will
192
+ # be 0 and the time axis will be 1.
193
+ # Also, let module-to-env pipeline know that we had added a single timestep
194
+ # time rank to the data (to remove it again).
195
+ if not self._as_learner_connector:
196
+ for column in batch.keys():
197
+ self.foreach_batch_item_change_in_place(
198
+ batch=batch,
199
+ column=column,
200
+ func=lambda item, eps_id, aid, mid: (
201
+ item
202
+ if mid is not None and not rl_module[mid].is_stateful()
203
+ # Expand on axis 0 (the to-be-time-dim) if item has not been
204
+ # batched yet, otherwise axis=1 (the time-dim).
205
+ else tree.map_structure(
206
+ lambda s: np.expand_dims(
207
+ s, axis=(1 if isinstance(s, BatchedNdArray) else 0)
208
+ ),
209
+ item,
210
+ )
211
+ ),
212
+ )
213
+ shared_data["_added_single_ts_time_rank"] = True
214
+ else:
215
+ # Before adding STATE_IN to the `data`, zero-pad existing data and batch
216
+ # into max_seq_len chunks.
217
+ for column, column_data in batch.copy().items():
218
+ # Do not zero-pad INFOS column.
219
+ if column == Columns.INFOS:
220
+ continue
221
+ for key, item_list in column_data.items():
222
+ # Multi-agent case AND RLModule is not stateful -> Do not zero-pad
223
+ # for this model.
224
+ assert isinstance(key, tuple)
225
+ mid = None
226
+ if len(key) == 3:
227
+ eps_id, aid, mid = key
228
+ if not rl_module[mid].is_stateful():
229
+ continue
230
+ column_data[key] = split_and_zero_pad(
231
+ item_list,
232
+ max_seq_len=self._get_max_seq_len(rl_module, module_id=mid),
233
+ )
234
+ # TODO (sven): Remove this hint/hack once we are not relying on
235
+ # SampleBatch anymore (which has to set its property
236
+ # zero_padded=True when shuffling).
237
+ shared_data[
238
+ (
239
+ "_zero_padded_for_mid="
240
+ f"{mid if mid is not None else DEFAULT_MODULE_ID}"
241
+ )
242
+ ] = True
243
+
244
+ for sa_episode in self.single_agent_episode_iterator(
245
+ # If Learner connector, get all episodes (for train batch).
246
+ # If EnvToModule, get only those ongoing episodes that just had their
247
+ # agent step (b/c those are the ones we need to compute actions for next).
248
+ episodes,
249
+ agents_that_stepped_only=False,
250
+ ):
251
+ # Multi-agent case: Extract correct single agent RLModule (to get its
252
+ # individual state).
253
+ if sa_episode.module_id is not None:
254
+ sa_module = rl_module[sa_episode.module_id]
255
+ else:
256
+ sa_module = (
257
+ rl_module[DEFAULT_MODULE_ID]
258
+ if isinstance(rl_module, MultiRLModule)
259
+ else rl_module
260
+ )
261
+ # This single-agent RLModule is NOT stateful -> Skip.
262
+ if not sa_module.is_stateful():
263
+ continue
264
+
265
+ max_seq_len = sa_module.model_config["max_seq_len"]
266
+
267
+ # Also, create the loss mask (b/c of our now possibly zero-padded data)
268
+ # as well as the seq_lens array and add these to `data` as well.
269
+ mask, seq_lens = create_mask_and_seq_lens(len(sa_episode), max_seq_len)
270
+ self.add_n_batch_items(
271
+ batch=batch,
272
+ column=Columns.SEQ_LENS,
273
+ items_to_add=seq_lens,
274
+ num_items=len(seq_lens),
275
+ single_agent_episode=sa_episode,
276
+ )
277
+ if not shared_data.get("_added_loss_mask_for_valid_episode_ts"):
278
+ self.add_n_batch_items(
279
+ batch=batch,
280
+ column=Columns.LOSS_MASK,
281
+ items_to_add=mask,
282
+ num_items=len(mask),
283
+ single_agent_episode=sa_episode,
284
+ )
285
+
286
+ return batch
287
+
288
+ def _get_max_seq_len(self, rl_module, module_id=None):
289
+ if not isinstance(rl_module, MultiRLModule):
290
+ mod = rl_module
291
+ elif module_id:
292
+ mod = rl_module[module_id]
293
+ else:
294
+ mod = next(iter(rl_module.values()))
295
+ if "max_seq_len" not in mod.model_config:
296
+ raise ValueError(
297
+ "You are using a stateful RLModule and are not providing a "
298
+ "'max_seq_len' key inside your `model_config`. You can set this "
299
+ "dict and/or override keys in it via `config.rl_module("
300
+ "model_config={'max_seq_len': [some int]})`."
301
+ )
302
+ return mod.model_config["max_seq_len"]
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/agent_to_module_mapping.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import gymnasium as gym
5
+
6
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
7
+ from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
8
+ from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
9
+ from ray.rllib.utils.annotations import override
10
+ from ray.rllib.utils.typing import EpisodeType, ModuleID
11
+ from ray.util.annotations import PublicAPI
12
+
13
+
14
+ @PublicAPI(stability="alpha")
15
+ class AgentToModuleMapping(ConnectorV2):
16
+ """ConnectorV2 that performs mapping of data from AgentID based to ModuleID based.
17
+
18
+ Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
19
+ are added automatically by RLlib into every env-to-module/Learner connector
20
+ pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
21
+ `config.add_default_connectors_to_learner_pipeline ` are set to
22
+ False.
23
+
24
+ The default env-to-module connector pipeline is:
25
+ [
26
+ [0 or more user defined ConnectorV2 pieces],
27
+ AddObservationsFromEpisodesToBatch,
28
+ AddTimeDimToBatchAndZeroPad,
29
+ AddStatesFromEpisodesToBatch,
30
+ AgentToModuleMapping, # only in multi-agent setups!
31
+ BatchIndividualItems,
32
+ NumpyToTensor,
33
+ ]
34
+ The default Learner connector pipeline is:
35
+ [
36
+ [0 or more user defined ConnectorV2 pieces],
37
+ AddObservationsFromEpisodesToBatch,
38
+ AddColumnsFromEpisodesToTrainBatch,
39
+ AddTimeDimToBatchAndZeroPad,
40
+ AddStatesFromEpisodesToBatch,
41
+ AgentToModuleMapping, # only in multi-agent setups!
42
+ BatchIndividualItems,
43
+ NumpyToTensor,
44
+ ]
45
+
46
+ This connector piece is only used by RLlib (as a default connector piece) in a
47
+ multi-agent setup.
48
+
49
+ Note that before the mapping, `data` is expected to have the following
50
+ structure:
51
+ [col0]:
52
+ (eps_id0, ag0, mod0): [list of individual batch items]
53
+ (eps_id0, ag1, mod2): [list of individual batch items]
54
+ (eps_id1, ag0, mod1): [list of individual batch items]
55
+ [col1]:
56
+ etc..
57
+
58
+ The target structure of the above `data` would then be:
59
+ [mod0]:
60
+ [col0]: [batched data -> batch_size_B will be the number of all items in the
61
+ input data under col0 that have mod0 as their ModuleID]
62
+ [col1]: [batched data]
63
+ [mod1]:
64
+ [col0]: etc.
65
+
66
+ Mapping happens in the following stages:
67
+
68
+ 1) Under each column name, sort keys first by EpisodeID, then AgentID.
69
+ 2) Add ModuleID keys under each column name (no cost/extra memory) and map these
70
+ new keys to empty lists.
71
+ [col0] -> [mod0] -> []: Then push items that belong to mod0 into these lists.
72
+ 3) Perform batching on the per-module lists under each column:
73
+ [col0] -> [mod0]: [...] <- now batched data (numpy array or struct of numpy
74
+ arrays).
75
+ 4) Flip column names with ModuleIDs (no cost/extra memory):
76
+ [mod0]:
77
+ [col0]: [batched data]
78
+ etc..
79
+
80
+ Note that in order to unmap the resulting batch back into an AgentID based one,
81
+ we have to store the env vector index AND AgentID of each module's batch item
82
+ in an additionally returned `memorized_map_structure`.
83
+
84
+ .. testcode::
85
+
86
+ from ray.rllib.connectors.env_to_module import AgentToModuleMapping
87
+ from ray.rllib.utils.test_utils import check
88
+
89
+ batch = {
90
+ "obs": {
91
+ ("MA-EPS0", "agent0", "module0"): [0, 1, 2],
92
+ ("MA-EPS0", "agent1", "module1"): [3, 4, 5],
93
+ },
94
+ "actions": {
95
+ ("MA-EPS1", "agent2", "module0"): [8],
96
+ ("MA-EPS0", "agent1", "module1"): [9],
97
+ },
98
+ }
99
+
100
+ # Create our connector piece.
101
+ connector = AgentToModuleMapping(
102
+ rl_module_specs={"module0", "module1"},
103
+ agent_to_module_mapping_fn=(
104
+ lambda agent_id, eps: "module1" if agent_id == "agent1" else "module0"
105
+ ),
106
+ )
107
+
108
+ # Call the connector (and thereby flip from AgentID based to ModuleID based
109
+ # structure..
110
+ output_batch = connector(
111
+ rl_module=None, # This particular connector works without an RLModule.
112
+ batch=batch,
113
+ episodes=[], # This particular connector works without a list of episodes.
114
+ explore=True,
115
+ shared_data={},
116
+ )
117
+
118
+ # `data` should now be mapped from ModuleIDs to module data.
119
+ check(
120
+ output_batch,
121
+ {
122
+ "module0": {
123
+ "obs": [0, 1, 2],
124
+ "actions": [8],
125
+ },
126
+ "module1": {
127
+ "obs": [3, 4, 5],
128
+ "actions": [9],
129
+ },
130
+ },
131
+ )
132
+ """
133
+
134
+ @override(ConnectorV2)
135
+ def recompute_output_observation_space(
136
+ self,
137
+ input_observation_space: gym.Space,
138
+ input_action_space: gym.Space,
139
+ ) -> gym.Space:
140
+ return self._map_space_if_necessary(input_observation_space, "obs")
141
+
142
+ @override(ConnectorV2)
143
+ def recompute_output_action_space(
144
+ self,
145
+ input_observation_space: gym.Space,
146
+ input_action_space: gym.Space,
147
+ ) -> gym.Space:
148
+ return self._map_space_if_necessary(input_action_space, "act")
149
+
150
+ def __init__(
151
+ self,
152
+ input_observation_space: Optional[gym.Space] = None,
153
+ input_action_space: Optional[gym.Space] = None,
154
+ *,
155
+ rl_module_specs: Dict[ModuleID, RLModuleSpec],
156
+ agent_to_module_mapping_fn,
157
+ ):
158
+ super().__init__(input_observation_space, input_action_space)
159
+
160
+ self._rl_module_specs = rl_module_specs
161
+ self._agent_to_module_mapping_fn = agent_to_module_mapping_fn
162
+
163
+ @override(ConnectorV2)
164
+ def __call__(
165
+ self,
166
+ *,
167
+ rl_module: RLModule,
168
+ batch: Dict[str, Any],
169
+ episodes: List[EpisodeType],
170
+ explore: Optional[bool] = None,
171
+ shared_data: Optional[dict] = None,
172
+ **kwargs,
173
+ ) -> Any:
174
+ # Current agent to module mapping function.
175
+ # agent_to_module_mapping_fn = shared_data.get("agent_to_module_mapping_fn")
176
+ # Store in shared data, which module IDs map to which episode/agent, such
177
+ # that the module-to-env pipeline can map the data back to agents.
178
+ memorized_map_structure = defaultdict(list)
179
+ for column, agent_data in batch.items():
180
+ if rl_module is not None and column in rl_module:
181
+ continue
182
+ for eps_id, agent_id, module_id in agent_data.keys():
183
+ memorized_map_structure[module_id].append((eps_id, agent_id))
184
+ # TODO (sven): We should check that all columns have the same struct.
185
+ break
186
+
187
+ shared_data["memorized_map_structure"] = dict(memorized_map_structure)
188
+
189
+ # Mapping from ModuleID to column data.
190
+ data_by_module = {}
191
+
192
+ # Iterating over each column in the original data:
193
+ for column, agent_data in batch.items():
194
+ if rl_module is not None and column in rl_module:
195
+ if column in data_by_module:
196
+ data_by_module[column].update(agent_data)
197
+ else:
198
+ data_by_module[column] = agent_data
199
+ continue
200
+ for (
201
+ eps_id,
202
+ agent_id,
203
+ module_id,
204
+ ), values_batch_or_list in agent_data.items():
205
+ assert isinstance(values_batch_or_list, list)
206
+ for value in values_batch_or_list:
207
+ if module_id not in data_by_module:
208
+ data_by_module[module_id] = {column: []}
209
+ elif column not in data_by_module[module_id]:
210
+ data_by_module[module_id][column] = []
211
+
212
+ # Append the data.
213
+ data_by_module[module_id][column].append(value)
214
+
215
+ return data_by_module
216
+
217
+ def _map_space_if_necessary(self, space: gym.Space, which: str = "obs"):
218
+ # Analyze input observation space to check, whether the user has already taken
219
+ # care of the agent to module mapping.
220
+ if set(self._rl_module_specs) == set(space.spaces.keys()):
221
+ return space
222
+
223
+ # We need to take care of agent to module mapping. Figure out the resulting
224
+ # observation space here.
225
+ dummy_eps = MultiAgentEpisode()
226
+
227
+ ret_space = {}
228
+ for module_id in self._rl_module_specs:
229
+ # Easy way out, user has provided space in the RLModule spec dict.
230
+ if (
231
+ isinstance(self._rl_module_specs, dict)
232
+ and module_id in self._rl_module_specs
233
+ ):
234
+ if (
235
+ which == "obs"
236
+ and self._rl_module_specs[module_id].observation_space
237
+ ):
238
+ ret_space[module_id] = self._rl_module_specs[
239
+ module_id
240
+ ].observation_space
241
+ continue
242
+ elif which == "act" and self._rl_module_specs[module_id].action_space:
243
+ ret_space[module_id] = self._rl_module_specs[module_id].action_space
244
+ continue
245
+
246
+ # Need to reverse map spaces (for the different agents) to certain
247
+ # module IDs (using a dummy MultiAgentEpisode).
248
+ one_space = next(iter(space.spaces.values()))
249
+ # If all obs spaces are the same anyway, just use the first
250
+ # single-agent space.
251
+ if all(s == one_space for s in space.spaces.values()):
252
+ ret_space[module_id] = one_space
253
+ # Otherwise, we have to compare the ModuleID with all possible
254
+ # AgentIDs and find the agent ID that matches.
255
+ else:
256
+ match_aid = None
257
+ one_agent_for_module_found = False
258
+ for aid in space.spaces.keys():
259
+ # Match: Assign spaces for this agentID to the PolicyID.
260
+ if self._agent_to_module_mapping_fn(aid, dummy_eps) == module_id:
261
+ # Make sure, different agents that map to the same
262
+ # policy don't have different spaces.
263
+ if (
264
+ module_id in ret_space
265
+ and space[aid] != ret_space[module_id]
266
+ ):
267
+ raise ValueError(
268
+ f"Two agents ({aid} and {match_aid}) in your "
269
+ "environment map to the same ModuleID (as per your "
270
+ "`agent_to_module_mapping_fn`), however, these agents "
271
+ "also have different observation spaces as per the env!"
272
+ )
273
+ ret_space[module_id] = space[aid]
274
+ match_aid = aid
275
+ one_agent_for_module_found = True
276
+ # Still no space found for this module ID -> Error out.
277
+ if not one_agent_for_module_found:
278
+ raise ValueError(
279
+ f"Could not find or derive any {which}-space for RLModule "
280
+ f"{module_id}! This can happen if your `config.rl_module(rl_"
281
+ f"module_spec=...)` does NOT contain space information for this"
282
+ " particular single-agent module AND your agent-to-module-"
283
+ "mapping function is stochastic (such that for some agent A, "
284
+ "more than one ModuleID might be returned somewhat randomly). "
285
+ f"Fix this error by providing {which}-space information using "
286
+ "`config.rl_module(rl_module_spec=MultiRLModuleSpec("
287
+ f"rl_module_specs={{'{module_id}': RLModuleSpec("
288
+ "observation_space=..., action_space=...)}}))"
289
+ )
290
+
291
+ return gym.spaces.Dict(ret_space)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/batch_individual_items.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import gymnasium as gym
4
+
5
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
6
+ from ray.rllib.core import DEFAULT_MODULE_ID
7
+ from ray.rllib.core.columns import Columns
8
+ from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
9
+ from ray.rllib.core.rl_module.rl_module import RLModule
10
+ from ray.rllib.utils.annotations import override
11
+ from ray.rllib.utils.spaces.space_utils import batch as batch_fn
12
+ from ray.rllib.utils.typing import EpisodeType
13
+ from ray.util.annotations import PublicAPI
14
+
15
+
16
+ @PublicAPI(stability="alpha")
17
+ class BatchIndividualItems(ConnectorV2):
18
+ """Batches individual data-items (in lists) into tensors (with batch dimension).
19
+
20
+ Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
21
+ are added automatically by RLlib into every env-to-module/Learner connector
22
+ pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
23
+ `config.add_default_connectors_to_learner_pipeline ` are set to
24
+ False.
25
+
26
+ The default env-to-module connector pipeline is:
27
+ [
28
+ [0 or more user defined ConnectorV2 pieces],
29
+ AddObservationsFromEpisodesToBatch,
30
+ AddTimeDimToBatchAndZeroPad,
31
+ AddStatesFromEpisodesToBatch,
32
+ AgentToModuleMapping, # only in multi-agent setups!
33
+ BatchIndividualItems,
34
+ NumpyToTensor,
35
+ ]
36
+ The default Learner connector pipeline is:
37
+ [
38
+ [0 or more user defined ConnectorV2 pieces],
39
+ AddObservationsFromEpisodesToBatch,
40
+ AddColumnsFromEpisodesToTrainBatch,
41
+ AddTimeDimToBatchAndZeroPad,
42
+ AddStatesFromEpisodesToBatch,
43
+ AgentToModuleMapping, # only in multi-agent setups!
44
+ BatchIndividualItems,
45
+ NumpyToTensor,
46
+ ]
47
+
48
+ This ConnectorV2:
49
+ - Operates only on the input `data`, NOT the incoming list of episode objects
50
+ (ignored).
51
+ - In the single-agent case, `data` must already be a dict, structured as follows by
52
+ prior connector pieces of the same pipeline:
53
+ [col0] -> {[(eps_id,)]: [list of individual batch items]}
54
+ - In the multi-agent case, `data` must already be a dict, structured as follows by
55
+ prior connector pieces of the same pipeline (in particular the
56
+ `AgentToModuleMapping` piece):
57
+ [module_id] -> [col0] -> [list of individual batch items]
58
+ - Translates the above data under the different columns (e.g. "obs") into final
59
+ (batched) structures. For the single-agent case, the output `data` looks like this:
60
+ [col0] -> [possibly complex struct of batches (at the leafs)].
61
+ For the multi-agent case, the output `data` looks like this:
62
+ [module_id] -> [col0] -> [possibly complex struct of batches (at the leafs)].
63
+
64
+ .. testcode::
65
+
66
+ from ray.rllib.connectors.common import BatchIndividualItems
67
+ from ray.rllib.utils.test_utils import check
68
+
69
+ single_agent_batch = {
70
+ "obs": {
71
+ # Note that at this stage, next-obs is not part of the data anymore ..
72
+ ("MA-EPS0",): [0, 1],
73
+ ("MA-EPS1",): [2, 3],
74
+ },
75
+ "actions": {
76
+ # .. so we have as many actions per episode as we have observations.
77
+ ("MA-EPS0",): [4, 5],
78
+ ("MA-EPS1",): [6, 7],
79
+ },
80
+ }
81
+
82
+ # Create our (single-agent) connector piece.
83
+ connector = BatchIndividualItems()
84
+
85
+ # Call the connector (and thereby batch the individual items).
86
+ output_batch = connector(
87
+ rl_module=None, # This particular connector works without an RLModule.
88
+ batch=single_agent_batch,
89
+ episodes=[], # This particular connector works without a list of episodes.
90
+ explore=True,
91
+ shared_data={},
92
+ )
93
+
94
+ # `output_batch` should now be batched (episode IDs should have been removed
95
+ # from the struct).
96
+ check(
97
+ output_batch,
98
+ {"obs": [0, 1, 2, 3], "actions": [4, 5, 6, 7]},
99
+ )
100
+ """
101
+
102
+ def __init__(
103
+ self,
104
+ input_observation_space: Optional[gym.Space] = None,
105
+ input_action_space: Optional[gym.Space] = None,
106
+ *,
107
+ multi_agent: bool = False,
108
+ **kwargs,
109
+ ):
110
+ """Initializes a BatchIndividualItems instance.
111
+
112
+ Args:
113
+ multi_agent: Whether this is a connector operating on a multi-agent
114
+ observation space mapping AgentIDs to individual agents' observations.
115
+ """
116
+ super().__init__(
117
+ input_observation_space=input_observation_space,
118
+ input_action_space=input_action_space,
119
+ **kwargs,
120
+ )
121
+ self._multi_agent = multi_agent
122
+
123
+ @override(ConnectorV2)
124
+ def __call__(
125
+ self,
126
+ *,
127
+ rl_module: RLModule,
128
+ batch: Dict[str, Any],
129
+ episodes: List[EpisodeType],
130
+ explore: Optional[bool] = None,
131
+ shared_data: Optional[dict] = None,
132
+ **kwargs,
133
+ ) -> Any:
134
+ is_multi_rl_module = isinstance(rl_module, MultiRLModule)
135
+
136
+ # Convert lists of individual items into properly batched data.
137
+ for column, column_data in batch.copy().items():
138
+ # Multi-agent case: This connector piece should only be used after(!)
139
+ # the AgentToModuleMapping connector has already been applied, leading
140
+ # to a batch structure of:
141
+ # [module_id] -> [col0] -> [list of individual batch items]
142
+ if is_multi_rl_module and column in rl_module:
143
+ # Case, in which a column has already been properly batched before this
144
+ # connector piece is called.
145
+ if not self._multi_agent:
146
+ continue
147
+ # If MA Off-Policy and independent sampling we need to overcome this
148
+ # check.
149
+ module_data = column_data
150
+ for col, col_data in module_data.copy().items():
151
+ if isinstance(col_data, list) and col != Columns.INFOS:
152
+ module_data[col] = batch_fn(
153
+ col_data,
154
+ individual_items_already_have_batch_dim="auto",
155
+ )
156
+
157
+ # Simple case: There is a list directly under `column`:
158
+ # Batch the list.
159
+ elif isinstance(column_data, list):
160
+ batch[column] = batch_fn(
161
+ column_data,
162
+ individual_items_already_have_batch_dim="auto",
163
+ )
164
+
165
+ # Single-agent case: There is a dict under `column` mapping
166
+ # `eps_id` to lists of items:
167
+ # Concat all these lists, then batch.
168
+ elif not self._multi_agent:
169
+ # TODO: only really need this in non-Learner connector pipeline
170
+ memorized_map_structure = []
171
+ list_to_be_batched = []
172
+ for (eps_id,) in column_data.keys():
173
+ for item in column_data[(eps_id,)]:
174
+ # Only record structure for OBS column.
175
+ if column == Columns.OBS:
176
+ memorized_map_structure.append(eps_id)
177
+ list_to_be_batched.append(item)
178
+ # INFOS should not be batched (remain a list).
179
+ batch[column] = (
180
+ list_to_be_batched
181
+ if column == Columns.INFOS
182
+ else batch_fn(
183
+ list_to_be_batched,
184
+ individual_items_already_have_batch_dim="auto",
185
+ )
186
+ )
187
+ if is_multi_rl_module:
188
+ if DEFAULT_MODULE_ID not in batch:
189
+ batch[DEFAULT_MODULE_ID] = {}
190
+ batch[DEFAULT_MODULE_ID][column] = batch.pop(column)
191
+
192
+ # Only record structure for OBS column.
193
+ if column == Columns.OBS:
194
+ shared_data["memorized_map_structure"] = memorized_map_structure
195
+ # Multi-agent case: But Module ID not found in our RLModule -> Ignore this
196
+ # `module_id` entirely.
197
+ # else:
198
+ # pass
199
+
200
+ return batch
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/frame_stacking.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import gymnasium as gym
5
+ import tree # pip install dm_tree
6
+
7
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
8
+ from ray.rllib.core.columns import Columns
9
+ from ray.rllib.core.rl_module.rl_module import RLModule
10
+ from ray.rllib.utils.annotations import override
11
+ from ray.rllib.utils.typing import EpisodeType
12
+ from ray.util.annotations import PublicAPI
13
+
14
+
15
+ @PublicAPI(stability="alpha")
16
+ class _FrameStacking(ConnectorV2):
17
+ """A connector piece that stacks the previous n observations into one."""
18
+
19
+ @override(ConnectorV2)
20
+ def recompute_output_observation_space(
21
+ self,
22
+ input_observation_space: gym.Space,
23
+ input_action_space: gym.Space,
24
+ ) -> gym.Space:
25
+ # Change our observation space according to the given stacking settings.
26
+ if self._multi_agent:
27
+ ret = {}
28
+ for agent_id, obs_space in input_observation_space.spaces.items():
29
+ ret[agent_id] = self._convert_individual_space(obs_space)
30
+ return gym.spaces.Dict(ret)
31
+ else:
32
+ return self._convert_individual_space(input_observation_space)
33
+
34
+ def __init__(
35
+ self,
36
+ input_observation_space: Optional[gym.Space] = None,
37
+ input_action_space: Optional[gym.Space] = None,
38
+ *,
39
+ num_frames: int = 1,
40
+ multi_agent: bool = False,
41
+ as_learner_connector: bool = False,
42
+ **kwargs,
43
+ ):
44
+ """Initializes a _FrameStackingConnector instance.
45
+
46
+ Args:
47
+ num_frames: The number of observation frames to stack up (into a single
48
+ observation) for the RLModule's forward pass.
49
+ multi_agent: Whether this is a connector operating on a multi-agent
50
+ observation space mapping AgentIDs to individual agents' observations.
51
+ as_learner_connector: Whether this connector is part of a Learner connector
52
+ pipeline, as opposed to an env-to-module pipeline.
53
+ """
54
+ super().__init__(
55
+ input_observation_space=input_observation_space,
56
+ input_action_space=input_action_space,
57
+ **kwargs,
58
+ )
59
+
60
+ self._multi_agent = multi_agent
61
+ self.num_frames = num_frames
62
+ self._as_learner_connector = as_learner_connector
63
+
64
+ @override(ConnectorV2)
65
+ def __call__(
66
+ self,
67
+ *,
68
+ rl_module: RLModule,
69
+ batch: Dict[str, Any],
70
+ episodes: List[EpisodeType],
71
+ explore: Optional[bool] = None,
72
+ shared_data: Optional[dict] = None,
73
+ **kwargs,
74
+ ) -> Any:
75
+ # Learner connector pipeline. Episodes have been numpy'ized.
76
+ if self._as_learner_connector:
77
+ for sa_episode in self.single_agent_episode_iterator(
78
+ episodes, agents_that_stepped_only=False
79
+ ):
80
+
81
+ def _map_fn(s, _sa_episode=sa_episode):
82
+ # Squeeze out last dim.
83
+ s = np.squeeze(s, axis=-1)
84
+ # Calculate new shape and strides
85
+ new_shape = (len(_sa_episode), self.num_frames) + s.shape[1:]
86
+ new_strides = (s.strides[0],) + s.strides
87
+ # Create a strided view of the array.
88
+ return np.transpose(
89
+ np.lib.stride_tricks.as_strided(
90
+ s, shape=new_shape, strides=new_strides
91
+ ),
92
+ axes=[0, 2, 3, 1],
93
+ )
94
+
95
+ # Get all observations from the episode in one np array (except for
96
+ # the very last one, which is the final observation not needed for
97
+ # learning).
98
+ self.add_n_batch_items(
99
+ batch=batch,
100
+ column=Columns.OBS,
101
+ items_to_add=tree.map_structure(
102
+ _map_fn,
103
+ sa_episode.get_observations(
104
+ indices=slice(-self.num_frames + 1, len(sa_episode)),
105
+ neg_index_as_lookback=True,
106
+ fill=0.0,
107
+ ),
108
+ ),
109
+ num_items=len(sa_episode),
110
+ single_agent_episode=sa_episode,
111
+ )
112
+
113
+ # Env-to-module pipeline. Episodes still operate on lists.
114
+ else:
115
+ for sa_episode in self.single_agent_episode_iterator(episodes):
116
+ assert not sa_episode.is_numpy
117
+ # Get the list of observations to stack.
118
+ obs_stack = sa_episode.get_observations(
119
+ indices=slice(-self.num_frames, None),
120
+ fill=0.0,
121
+ )
122
+ # Observation components are (w, h, 1)
123
+ # -> concatenate along axis=-1 to (w, h, [num_frames]).
124
+ stacked_obs = tree.map_structure(
125
+ lambda *s: np.concatenate(s, axis=2),
126
+ *obs_stack,
127
+ )
128
+ self.add_batch_item(
129
+ batch=batch,
130
+ column=Columns.OBS,
131
+ item_to_add=stacked_obs,
132
+ single_agent_episode=sa_episode,
133
+ )
134
+
135
+ return batch
136
+
137
+ def _convert_individual_space(self, obs_space):
138
+ # Some assumptions: Space is box AND last dim (the stacking one) is 1.
139
+ assert isinstance(obs_space, gym.spaces.Box), obs_space
140
+ assert obs_space.shape[-1] == 1, obs_space
141
+
142
+ return gym.spaces.Box(
143
+ low=np.repeat(obs_space.low, repeats=self.num_frames, axis=-1),
144
+ high=np.repeat(obs_space.high, repeats=self.num_frames, axis=-1),
145
+ shape=list(obs_space.shape)[:-1] + [self.num_frames],
146
+ dtype=obs_space.dtype,
147
+ )
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/module_to_agent_unmapping.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
5
+ from ray.rllib.core.rl_module.rl_module import RLModule
6
+ from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
7
+ from ray.rllib.utils.annotations import override
8
+ from ray.rllib.utils.typing import EpisodeType
9
+ from ray.util.annotations import PublicAPI
10
+
11
+
12
+ @PublicAPI(stability="alpha")
13
+ class ModuleToAgentUnmapping(ConnectorV2):
14
+ """Performs flipping of `data` from ModuleID- to AgentID based mapping.
15
+
16
+ Before mapping:
17
+ data[module1] -> [col, e.g. ACTIONS]
18
+ -> [dict mapping episode-identifying tuples to lists of data]
19
+ data[module2] -> ...
20
+
21
+ After mapping:
22
+ data[ACTIONS]: [dict mapping episode-identifying tuples to lists of data]
23
+
24
+ Note that episode-identifying tuples have the form of: (episode_id,) in the
25
+ single-agent case and (ma_episode_id, agent_id, module_id) in the multi-agent
26
+ case.
27
+ """
28
+
29
+ @override(ConnectorV2)
30
+ def __call__(
31
+ self,
32
+ *,
33
+ rl_module: RLModule,
34
+ batch: Dict[str, Any],
35
+ episodes: List[EpisodeType],
36
+ explore: Optional[bool] = None,
37
+ shared_data: Optional[dict] = None,
38
+ **kwargs,
39
+ ) -> Any:
40
+ # This Connector should only be used in a multi-agent setting.
41
+ assert isinstance(episodes[0], MultiAgentEpisode)
42
+
43
+ agent_data = defaultdict(dict)
44
+ for module_id, module_data in batch.items():
45
+ for column, values_dict in module_data.items():
46
+ agent_data[column].update(values_dict)
47
+
48
+ return dict(agent_data)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/numpy_to_tensor.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import gymnasium as gym
4
+
5
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
6
+ from ray.rllib.core import DEFAULT_MODULE_ID
7
+ from ray.rllib.core.columns import Columns
8
+ from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
9
+ from ray.rllib.core.rl_module.rl_module import RLModule
10
+ from ray.rllib.utils.annotations import override
11
+ from ray.rllib.utils.torch_utils import convert_to_torch_tensor
12
+ from ray.rllib.utils.typing import EpisodeType
13
+ from ray.util.annotations import PublicAPI
14
+
15
+
16
+ @PublicAPI(stability="alpha")
17
+ class NumpyToTensor(ConnectorV2):
18
+ """Converts numpy arrays across the entire input data into (framework) tensors.
19
+
20
+ The framework information is received via the provided `rl_module` arg in the
21
+ `__call__()` method.
22
+
23
+ Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
24
+ are added automatically by RLlib into every env-to-module/Learner connector
25
+ pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
26
+ `config.add_default_connectors_to_learner_pipeline ` are set to
27
+ False.
28
+
29
+ The default env-to-module connector pipeline is:
30
+ [
31
+ [0 or more user defined ConnectorV2 pieces],
32
+ AddObservationsFromEpisodesToBatch,
33
+ AddTimeDimToBatchAndZeroPad,
34
+ AddStatesFromEpisodesToBatch,
35
+ AgentToModuleMapping, # only in multi-agent setups!
36
+ BatchIndividualItems,
37
+ NumpyToTensor,
38
+ ]
39
+ The default Learner connector pipeline is:
40
+ [
41
+ [0 or more user defined ConnectorV2 pieces],
42
+ AddObservationsFromEpisodesToBatch,
43
+ AddColumnsFromEpisodesToTrainBatch,
44
+ AddTimeDimToBatchAndZeroPad,
45
+ AddStatesFromEpisodesToBatch,
46
+ AgentToModuleMapping, # only in multi-agent setups!
47
+ BatchIndividualItems,
48
+ NumpyToTensor,
49
+ ]
50
+
51
+ This ConnectorV2:
52
+ - Loops through the input `data` and converts all found numpy arrays into
53
+ framework-specific tensors (possibly on a GPU).
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ input_observation_space: Optional[gym.Space] = None,
59
+ input_action_space: Optional[gym.Space] = None,
60
+ *,
61
+ as_learner_connector: bool = False,
62
+ pin_mempory: Optional[bool] = None,
63
+ device: Optional[str] = None,
64
+ **kwargs,
65
+ ):
66
+ """Initializes a NumpyToTensor instance.
67
+
68
+ Args:
69
+ as_learner_connector: Whether this ConnectorV2 piece is used inside a
70
+ LearnerConnectorPipeline or not.
71
+ pin_mempory: Whether to pin memory when creating (torch) tensors.
72
+ If None (default), pins memory if `as_learner_connector` is True,
73
+ otherwise doesn't pin memory.
74
+ device: An optional device to move the resulting tensors to. If not
75
+ provided, all data will be left on the CPU.
76
+ **kwargs:
77
+ """
78
+ super().__init__(
79
+ input_observation_space=input_observation_space,
80
+ input_action_space=input_action_space,
81
+ **kwargs,
82
+ )
83
+ self._as_learner_connector = as_learner_connector
84
+ self._pin_memory = (
85
+ pin_mempory if pin_mempory is not None else self._as_learner_connector
86
+ )
87
+ self._device = device
88
+
89
+ @override(ConnectorV2)
90
+ def __call__(
91
+ self,
92
+ *,
93
+ rl_module: RLModule,
94
+ batch: Dict[str, Any],
95
+ episodes: List[EpisodeType],
96
+ explore: Optional[bool] = None,
97
+ shared_data: Optional[dict] = None,
98
+ **kwargs,
99
+ ) -> Any:
100
+ is_single_agent = False
101
+ is_multi_rl_module = isinstance(rl_module, MultiRLModule)
102
+ # `data` already a ModuleID to batch mapping format.
103
+ if not (is_multi_rl_module and all(c in rl_module._rl_modules for c in batch)):
104
+ is_single_agent = True
105
+ batch = {DEFAULT_MODULE_ID: batch}
106
+
107
+ for module_id, module_data in batch.copy().items():
108
+ infos = module_data.pop(Columns.INFOS, None)
109
+ if rl_module.framework == "torch":
110
+ module_data = convert_to_torch_tensor(
111
+ module_data, pin_memory=self._pin_memory, device=self._device
112
+ )
113
+ else:
114
+ raise ValueError(
115
+ "`NumpyToTensor`does NOT support frameworks other than torch!"
116
+ )
117
+ if infos is not None:
118
+ module_data[Columns.INFOS] = infos
119
+ # Early out with data under(!) `DEFAULT_MODULE_ID`, b/c we are in plain
120
+ # single-agent mode.
121
+ if is_single_agent:
122
+ return module_data
123
+ batch[module_id] = module_data
124
+
125
+ return batch
.venv/lib/python3.11/site-packages/ray/rllib/connectors/common/tensor_to_numpy.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
4
+ from ray.rllib.core.rl_module.rl_module import RLModule
5
+ from ray.rllib.utils.annotations import override
6
+ from ray.rllib.utils.numpy import convert_to_numpy
7
+ from ray.rllib.utils.typing import EpisodeType
8
+ from ray.util.annotations import PublicAPI
9
+
10
+
11
+ @PublicAPI(stability="alpha")
12
+ class TensorToNumpy(ConnectorV2):
13
+ """Converts (framework) tensors across the entire input data into numpy arrays."""
14
+
15
+ @override(ConnectorV2)
16
+ def __call__(
17
+ self,
18
+ *,
19
+ rl_module: RLModule,
20
+ batch: Dict[str, Any],
21
+ episodes: List[EpisodeType],
22
+ explore: Optional[bool] = None,
23
+ shared_data: Optional[dict] = None,
24
+ **kwargs,
25
+ ) -> Any:
26
+ return convert_to_numpy(batch)
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.7 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/env_to_module_pipeline.cpython-311.pyc ADDED
Binary file (2.61 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/flatten_observations.cpython-311.pyc ADDED
Binary file (9.32 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/frame_stacking.cpython-311.pyc ADDED
Binary file (465 Bytes). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/observation_preprocessor.cpython-311.pyc ADDED
Binary file (3.77 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/env_to_module/__pycache__/write_observations_to_episodes.cpython-311.pyc ADDED
Binary file (5.87 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_one_ts_to_episodes_and_truncate.cpython-311.pyc ADDED
Binary file (6.34 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/frame_stacking.cpython-311.pyc ADDED
Binary file (455 Bytes). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/learner_connector_pipeline.cpython-311.pyc ADDED
Binary file (2.75 kB). View file
 
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
4
+ from ray.rllib.core.columns import Columns
5
+ from ray.rllib.core.rl_module.rl_module import RLModule
6
+ from ray.rllib.utils.annotations import override
7
+ from ray.rllib.utils.typing import EpisodeType
8
+ from ray.util.annotations import PublicAPI
9
+
10
+
11
+ @PublicAPI(stability="alpha")
12
+ class AddColumnsFromEpisodesToTrainBatch(ConnectorV2):
13
+ """Adds infos/actions/rewards/terminateds/... to train batch.
14
+
15
+ Note: This is one of the default Learner ConnectorV2 pieces that are added
16
+ automatically by RLlib into every Learner connector pipeline, unless
17
+ `config.add_default_connectors_to_learner_pipeline` is set to False.
18
+
19
+ The default Learner connector pipeline is:
20
+ [
21
+ [0 or more user defined ConnectorV2 pieces],
22
+ AddObservationsFromEpisodesToBatch,
23
+ AddColumnsFromEpisodesToTrainBatch,
24
+ AddTimeDimToBatchAndZeroPad,
25
+ AddStatesFromEpisodesToBatch,
26
+ AgentToModuleMapping, # only in multi-agent setups!
27
+ BatchIndividualItems,
28
+ NumpyToTensor,
29
+ ]
30
+
31
+ Does NOT add observations to train batch (these should have already been added
32
+ by another ConnectorV2 piece: `AddObservationsToTrainBatch` in the same pipeline).
33
+
34
+ If provided with `episodes` data, this connector piece makes sure that the final
35
+ train batch going into the RLModule for updating (`forward_train()` call) contains
36
+ at the minimum:
37
+ - Observations: From all episodes under the Columns.OBS key.
38
+ - Actions, rewards, terminal/truncation flags: From all episodes under the
39
+ respective keys.
40
+ - All data inside the episodes' `extra_model_outs` property, e.g. action logp and
41
+ action probs under the respective keys.
42
+ - Internal states: These will NOT be added to the batch by this connector piece
43
+ as this functionality is handled by a different default connector piece:
44
+ `AddStatesFromEpisodesToBatch`.
45
+
46
+ If the user wants to customize their own data under the given keys (e.g. obs,
47
+ actions, ...), they can extract from the episodes or recompute from `data`
48
+ their own data and store it in `data` under those keys. In this case, the default
49
+ connector will not change the data under these keys and simply act as a
50
+ pass-through.
51
+ """
52
+
53
+ @override(ConnectorV2)
54
+ def __call__(
55
+ self,
56
+ *,
57
+ rl_module: RLModule,
58
+ batch: Optional[Dict[str, Any]],
59
+ episodes: List[EpisodeType],
60
+ explore: Optional[bool] = None,
61
+ shared_data: Optional[dict] = None,
62
+ **kwargs,
63
+ ) -> Any:
64
+ # Infos.
65
+ if Columns.INFOS not in batch:
66
+ for sa_episode in self.single_agent_episode_iterator(
67
+ episodes,
68
+ agents_that_stepped_only=False,
69
+ ):
70
+ self.add_n_batch_items(
71
+ batch,
72
+ Columns.INFOS,
73
+ items_to_add=sa_episode.get_infos(slice(0, len(sa_episode))),
74
+ num_items=len(sa_episode),
75
+ single_agent_episode=sa_episode,
76
+ )
77
+
78
+ # Actions.
79
+ if Columns.ACTIONS not in batch:
80
+ for sa_episode in self.single_agent_episode_iterator(
81
+ episodes,
82
+ agents_that_stepped_only=False,
83
+ ):
84
+ self.add_n_batch_items(
85
+ batch,
86
+ Columns.ACTIONS,
87
+ items_to_add=[
88
+ sa_episode.get_actions(indices=ts)
89
+ for ts in range(len(sa_episode))
90
+ ],
91
+ num_items=len(sa_episode),
92
+ single_agent_episode=sa_episode,
93
+ )
94
+ # Rewards.
95
+ if Columns.REWARDS not in batch:
96
+ for sa_episode in self.single_agent_episode_iterator(
97
+ episodes,
98
+ agents_that_stepped_only=False,
99
+ ):
100
+ self.add_n_batch_items(
101
+ batch,
102
+ Columns.REWARDS,
103
+ items_to_add=[
104
+ sa_episode.get_rewards(indices=ts)
105
+ for ts in range(len(sa_episode))
106
+ ],
107
+ num_items=len(sa_episode),
108
+ single_agent_episode=sa_episode,
109
+ )
110
+ # Terminateds.
111
+ if Columns.TERMINATEDS not in batch:
112
+ for sa_episode in self.single_agent_episode_iterator(
113
+ episodes,
114
+ agents_that_stepped_only=False,
115
+ ):
116
+ self.add_n_batch_items(
117
+ batch,
118
+ Columns.TERMINATEDS,
119
+ items_to_add=(
120
+ [False] * (len(sa_episode) - 1) + [sa_episode.is_terminated]
121
+ if len(sa_episode) > 0
122
+ else []
123
+ ),
124
+ num_items=len(sa_episode),
125
+ single_agent_episode=sa_episode,
126
+ )
127
+ # Truncateds.
128
+ if Columns.TRUNCATEDS not in batch:
129
+ for sa_episode in self.single_agent_episode_iterator(
130
+ episodes,
131
+ agents_that_stepped_only=False,
132
+ ):
133
+ self.add_n_batch_items(
134
+ batch,
135
+ Columns.TRUNCATEDS,
136
+ items_to_add=(
137
+ [False] * (len(sa_episode) - 1) + [sa_episode.is_truncated]
138
+ if len(sa_episode) > 0
139
+ else []
140
+ ),
141
+ num_items=len(sa_episode),
142
+ single_agent_episode=sa_episode,
143
+ )
144
+ # Extra model outputs (except for STATE_OUT, which will be handled by another
145
+ # default connector piece). Also, like with all the fields above, skip
146
+ # those that the user already seemed to have populated via custom connector
147
+ # pieces.
148
+ skip_columns = set(batch.keys()) | {Columns.STATE_IN, Columns.STATE_OUT}
149
+ for sa_episode in self.single_agent_episode_iterator(
150
+ episodes,
151
+ agents_that_stepped_only=False,
152
+ ):
153
+ for column in sa_episode.extra_model_outputs.keys():
154
+ if column not in skip_columns:
155
+ self.add_n_batch_items(
156
+ batch,
157
+ column,
158
+ items_to_add=[
159
+ sa_episode.get_extra_model_outputs(key=column, indices=ts)
160
+ for ts in range(len(sa_episode))
161
+ ],
162
+ num_items=len(sa_episode),
163
+ single_agent_episode=sa_episode,
164
+ )
165
+
166
+ return batch
.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from ray.rllib.core.columns import Columns
4
+ from ray.rllib.connectors.connector_v2 import ConnectorV2
5
+ from ray.rllib.core.rl_module.rl_module import RLModule
6
+ from ray.rllib.utils.annotations import override
7
+ from ray.rllib.utils.typing import EpisodeType
8
+ from ray.util.annotations import PublicAPI
9
+
10
+
11
+ @PublicAPI(stability="alpha")
12
+ class AddNextObservationsFromEpisodesToTrainBatch(ConnectorV2):
13
+ """Adds the NEXT_OBS column with the correct episode observations to train batch.
14
+
15
+ - Operates on a list of Episode objects.
16
+ - Gets all observation(s) from all the given episodes (except the very first ones)
17
+ and adds them to the batch under construction in the NEXT_OBS column (as a list of
18
+ individual observations).
19
+ - Does NOT alter any observations (or other data) in the given episodes.
20
+ - Can be used in Learner connector pipelines.
21
+
22
+ .. testcode::
23
+
24
+ import gymnasium as gym
25
+ import numpy as np
26
+
27
+ from ray.rllib.connectors.learner import (
28
+ AddNextObservationsFromEpisodesToTrainBatch
29
+ )
30
+ from ray.rllib.core.columns import Columns
31
+ from ray.rllib.env.single_agent_episode import SingleAgentEpisode
32
+ from ray.rllib.utils.test_utils import check
33
+
34
+ # Create two dummy SingleAgentEpisodes, each containing 3 observations,
35
+ # 2 actions and 2 rewards (both episodes are length=2).
36
+ obs_space = gym.spaces.Box(-1.0, 1.0, (2,), np.float32)
37
+ act_space = gym.spaces.Discrete(2)
38
+
39
+ episodes = [SingleAgentEpisode(
40
+ observations=[obs_space.sample(), obs_space.sample(), obs_space.sample()],
41
+ actions=[act_space.sample(), act_space.sample()],
42
+ rewards=[1.0, 2.0],
43
+ len_lookback_buffer=0,
44
+ ) for _ in range(2)]
45
+ eps_1_next_obses = episodes[0].get_observations([1, 2])
46
+ eps_2_next_obses = episodes[1].get_observations([1, 2])
47
+ print(f"1st Episode's next obses are {eps_1_next_obses}")
48
+ print(f"2nd Episode's next obses are {eps_2_next_obses}")
49
+
50
+ # Create an instance of this class.
51
+ connector = AddNextObservationsFromEpisodesToTrainBatch()
52
+
53
+ # Call the connector with the two created episodes.
54
+ # Note that this particular connector works without an RLModule, so we
55
+ # simplify here for the sake of this example.
56
+ output_data = connector(
57
+ rl_module=None,
58
+ batch={},
59
+ episodes=episodes,
60
+ explore=True,
61
+ shared_data={},
62
+ )
63
+ # The output data should now contain the last observations of both episodes,
64
+ # in a "per-episode organized" fashion.
65
+ check(
66
+ output_data,
67
+ {
68
+ Columns.NEXT_OBS: {
69
+ (episodes[0].id_,): eps_1_next_obses,
70
+ (episodes[1].id_,): eps_2_next_obses,
71
+ },
72
+ },
73
+ )
74
+ """
75
+
76
+ @override(ConnectorV2)
77
+ def __call__(
78
+ self,
79
+ *,
80
+ rl_module: RLModule,
81
+ batch: Dict[str, Any],
82
+ episodes: List[EpisodeType],
83
+ explore: Optional[bool] = None,
84
+ shared_data: Optional[dict] = None,
85
+ **kwargs,
86
+ ) -> Any:
87
+ # If "obs" already in `batch`, early out.
88
+ if Columns.NEXT_OBS in batch:
89
+ return batch
90
+
91
+ for sa_episode in self.single_agent_episode_iterator(
92
+ # This is a Learner-only connector -> Get all episodes (for train batch).
93
+ episodes,
94
+ agents_that_stepped_only=False,
95
+ ):
96
+ self.add_n_batch_items(
97
+ batch,
98
+ Columns.NEXT_OBS,
99
+ items_to_add=sa_episode.get_observations(slice(1, len(sa_episode) + 1)),
100
+ num_items=len(sa_episode),
101
+ single_agent_episode=sa_episode,
102
+ )
103
+ return batch