diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9920b5ee9f060416d42a419e5374f077c9db147c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/columns.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/columns.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..123e07b609c5f6bc3153f0d01caf5e6007d4d7df Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/columns.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/columns.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/columns.py new file mode 100644 index 0000000000000000000000000000000000000000..98cb8646913e2b333eab3243308b82419f05d9bc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/columns.py @@ -0,0 +1,73 @@ +from ray.util.annotations import DeveloperAPI + + +@DeveloperAPI +class Columns: + """Definitions of common column names for RL data, e.g. 'obs', 'rewards', etc.. + + Note that this replaces the `SampleBatch` and `Postprocessing` columns (of the same + name). + """ + + # Observation received from an environment after `reset()` or `step()`. + OBS = "obs" + # Infos received from an environment after `reset()` or `step()`. + INFOS = "infos" + + # Action computed/sampled by an RLModule. + ACTIONS = "actions" + # Action actually sent to the (gymnasium) `Env.step()` method. + ACTIONS_FOR_ENV = "actions_for_env" + # Reward returned by `env.step()`. + REWARDS = "rewards" + # Termination signal received from an environment after `step()`. + TERMINATEDS = "terminateds" + # Truncation signal received from an environment after `step()` (e.g. because + # of a reached time limit). + TRUNCATEDS = "truncateds" + + # Next observation: Only used by algorithms that need to look at TD-data for + # training, such as off-policy/DQN algos. + NEXT_OBS = "new_obs" + + # Uniquely identifies an episode + EPS_ID = "eps_id" + AGENT_ID = "agent_id" + MODULE_ID = "module_id" + + # The size of non-zero-padded data within a (e.g. LSTM) zero-padded + # (B, T, ...)-style train batch. + SEQ_LENS = "seq_lens" + # Episode timestep counter. + T = "t" + + # Common extra RLModule output keys. + STATE_IN = "state_in" + NEXT_STATE_IN = "next_state_in" + STATE_OUT = "state_out" + NEXT_STATE_OUT = "next_state_out" + EMBEDDINGS = "embeddings" + ACTION_DIST_INPUTS = "action_dist_inputs" + ACTION_PROB = "action_prob" + ACTION_LOGP = "action_logp" + + # Value function predictions. + VF_PREDS = "vf_preds" + # Values, predicted at one timestep beyond the last timestep taken. + # These are usually calculated via the value function network using the final + # observation (and in case of an RNN: the last returned internal state). + VALUES_BOOTSTRAPPED = "values_bootstrapped" + + # Postprocessing columns. + ADVANTAGES = "advantages" + VALUE_TARGETS = "value_targets" + + # Intrinsic rewards (learning with curiosity). + INTRINSIC_REWARDS = "intrinsic_rewards" + # Discounted sum of rewards till the end of the episode (or chunk). + RETURNS_TO_GO = "returns_to_go" + + # Loss mask. If provided in a train batch, a Learner's compute_loss_for_module + # method should respect the False-set value in here and mask out the respective + # items form the loss. + LOSS_MASK = "loss_mask" diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1327a6d9267bcd621333ccef420e2c6e1e55b0b1 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..033eeea46a075dbe19cb5c81322c5cd455c2bdc3 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/base.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/catalog.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/catalog.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..598627d0ca8725373f89c89ba934cc06f278d236 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/catalog.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/configs.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/configs.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..093baff45d066d42d689b32280590f805b7a05d6 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/configs.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/base.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/base.py new file mode 100644 index 0000000000000000000000000000000000000000..3bb6304449a577a767085e2743a322d70b1af124 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/base.py @@ -0,0 +1,444 @@ +import abc +from typing import List, Optional, Tuple, Union + + +from ray.rllib.core.columns import Columns +from ray.rllib.core.models.configs import ModelConfig +from ray.rllib.core.models.specs.specs_base import Spec +from ray.rllib.policy.rnn_sequencing import get_fold_unfold_fns +from ray.rllib.utils.annotations import ExperimentalAPI, override +from ray.rllib.utils.typing import TensorType +from ray.util.annotations import DeveloperAPI + +# Top level keys that unify model i/o. +ENCODER_OUT: str = "encoder_out" +# For Actor-Critic algorithms, these signify data related to the actor and critic +ACTOR: str = "actor" +CRITIC: str = "critic" + + +@ExperimentalAPI +class Model(abc.ABC): + """Framework-agnostic base class for RLlib models. + + Models are low-level neural network components that offer input- and + output-specification, a forward method, and a get_initial_state method. Models + are composed in RLModules. + + Usage Example together with ModelConfig: + + .. testcode:: + + from ray.rllib.core.models.base import Model + from ray.rllib.core.models.configs import ModelConfig + from dataclasses import dataclass + + class MyModel(Model): + def __init__(self, config): + super().__init__(config) + self.my_param = config.my_param * 2 + + def _forward(self, input_dict): + return input_dict["obs"] * self.my_param + + + @dataclass + class MyModelConfig(ModelConfig): + my_param: int = 42 + + def build(self, framework: str): + if framework == "bork": + return MyModel(self) + + + config = MyModelConfig(my_param=3) + model = config.build(framework="bork") + print(model._forward({"obs": 1})) + + .. testoutput:: + + 6 + + """ + + def __init__(self, config: ModelConfig): + self.config = config + + def __init_subclass__(cls, **kwargs): + # Automatically add a __post_init__ method to all subclasses of Model. + # This method is called after the __init__ method of the subclass. + def init_decorator(previous_init): + def new_init(self, *args, **kwargs): + previous_init(self, *args, **kwargs) + if type(self) is cls: + self.__post_init__() + + return new_init + + cls.__init__ = init_decorator(cls.__init__) + + def __post_init__(self): + """Called automatically after the __init__ method of the subclasses. + + The module first calls the __init__ method of the subclass, With in the + __init__ you should call the super().__init__ method. Then after the __init__ + method of the subclass is called, the __post_init__ method is called. + + This is a good place to do any initialization that requires access to the + subclass's attributes. + """ + self._input_specs = self.get_input_specs() + self._output_specs = self.get_output_specs() + + def get_input_specs(self) -> Optional[Spec]: + """Returns the input specs of this model. + + Override `get_input_specs` to define your own input specs. + This method should not be called often, e.g. every forward pass. + Instead, it should be called once at instantiation to define Model.input_specs. + + Returns: + Spec: The input specs. + """ + return None + + def get_output_specs(self) -> Optional[Spec]: + """Returns the output specs of this model. + + Override `get_output_specs` to define your own output specs. + This method should not be called often, e.g. every forward pass. + Instead, it should be called once at instantiation to define Model.output_specs. + + Returns: + Spec: The output specs. + """ + return None + + @property + def input_specs(self) -> Spec: + """Returns the input spec of this model.""" + return self._input_specs + + @input_specs.setter + def input_specs(self, spec: Spec) -> None: + raise ValueError( + "`input_specs` cannot be set directly. Override " + "Model.get_input_specs() instead. Set Model._input_specs if " + "you want to override this behavior." + ) + + @property + def output_specs(self) -> Spec: + """Returns the output specs of this model.""" + return self._output_specs + + @output_specs.setter + def output_specs(self, spec: Spec) -> None: + raise ValueError( + "`output_specs` cannot be set directly. Override " + "Model.get_output_specs() instead. Set Model._output_specs if " + "you want to override this behavior." + ) + + def get_initial_state(self) -> Union[dict, List[TensorType]]: + """Returns the initial state of the Model. + + It can be left empty if this Model is not stateful. + """ + return dict() + + @abc.abstractmethod + def _forward(self, input_dict: dict, **kwargs) -> dict: + """Returns the output of this model for the given input. + + This method is called by the forwarding method of the respective framework + that is itself wrapped by RLlib in order to check model inputs and outputs. + + Args: + input_dict: The input tensors. + **kwargs: Forward compatibility kwargs. + + Returns: + dict: The output tensors. + """ + + @abc.abstractmethod + def get_num_parameters(self) -> Tuple[int, int]: + """Returns a tuple of (num trainable params, num non-trainable params).""" + + @abc.abstractmethod + def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)) -> None: + """Helper method to set all weights to deterministic dummy values. + + Calling this method on two `Models` that have the same architecture using + the exact same `value_sequence` arg should make both models output the exact + same values on arbitrary inputs. This will work, even if the two `Models` + are of different DL frameworks. + + Args: + value_sequence: Looping through the list of all parameters (weight matrices, + bias tensors, etc..) of this model, in each iteration i, we set all + values in this parameter to `value_sequence[i % len(value_sequence)]` + (round robin). + + Example: + TODO: + """ + + +@ExperimentalAPI +class Encoder(Model, abc.ABC): + """The framework-agnostic base class for all RLlib encoders. + + Encoders are used to transform observations to a latent space. + Therefore, their `input_specs` contains the observation space dimensions. + Similarly, their `output_specs` contains the latent space dimensions. + Encoders can be recurrent, in which case the state should be part of input- and + output_specs. The latent vectors produced by an encoder are fed into subsequent + "heads". Any implementation of Encoder should also be callable. This should be done + by also inheriting from a framework-specific model base-class, s.a. TorchModel or + TfModel. + + Abstract illustration of typical flow of tensors: + + Inputs + | + Encoder + | \ + Head1 Head2 + | / + Outputs + + Outputs of encoders are generally of shape (B, latent_dim) or (B, T, latent_dim). + That is, for time-series data, we encode into the latent space for each time step. + This should be reflected in the `output_specs`. + + Usage example together with a ModelConfig: + + .. testcode:: + + from dataclasses import dataclass + import numpy as np + + from ray.rllib.core.columns import Columns + from ray.rllib.core.models.base import Encoder, ENCODER_OUT + from ray.rllib.core.models.configs import ModelConfig + from ray.rllib.policy.sample_batch import SampleBatch + + class NumpyEncoder(Encoder): + def __init__(self, config): + super().__init__(config) + self.factor = config.factor + + def __call__(self, *args, **kwargs): + # This is a dummy method to do checked forward passes. + return self._forward(*args, **kwargs) + + def _forward(self, input_dict, **kwargs): + obs = input_dict[Columns.OBS] + return { + ENCODER_OUT: np.array(obs) * self.factor, + Columns.STATE_OUT: ( + np.array(input_dict[Columns.STATE_IN]) + * self.factor + ), + } + + @dataclass + class NumpyEncoderConfig(ModelConfig): + factor: int = None + + def build(self, framework: str): + return NumpyEncoder(self) + + config = NumpyEncoderConfig(factor=2) + encoder = NumpyEncoder(config) + print(encoder({Columns.OBS: 1, Columns.STATE_IN: 2})) + + .. testoutput:: + + {'encoder_out': 2, 'state_out': 4} + + """ + + @abc.abstractmethod + def _forward(self, input_dict: dict, **kwargs) -> dict: + """Returns the latent of the encoder for the given inputs. + + This method is called by the forwarding method of the respective framework + that is itself wrapped by RLlib in order to check model inputs and outputs. + + The input dict contains at minimum the observation and the state of the encoder + (None for stateless encoders). + The output dict contains at minimum the latent and the state of the encoder + (None for stateless encoders). + To establish an agreement between the encoder and RLModules, these values + have the fixed keys `Columns.OBS` for the `input_dict`, + and `ACTOR` and `CRITIC` for the returned dict. + + Args: + input_dict: The input tensors. Must contain at a minimum the keys + Columns.OBS and Columns.STATE_IN (which might be None for stateless + encoders). + **kwargs: Forward compatibility kwargs. + + Returns: + The output tensors. Must contain at a minimum the key ENCODER_OUT. + """ + + +@ExperimentalAPI +class ActorCriticEncoder(Encoder): + """An encoder that potentially holds two stateless encoders. + + This is a special case of Encoder that can either enclose a single, + shared encoder or two separate encoders: One for the actor and one for the + critic. The two encoders are of the same type, and we can therefore make the + assumption that they have the same input and output specs. + """ + + framework = None + + def __init__(self, config: ModelConfig) -> None: + super().__init__(config) + + if config.shared: + self.encoder = config.base_encoder_config.build(framework=self.framework) + else: + self.actor_encoder = config.base_encoder_config.build( + framework=self.framework + ) + self.critic_encoder = None + if not config.inference_only: + self.critic_encoder = config.base_encoder_config.build( + framework=self.framework + ) + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + if self.config.shared: + encoder_outs = self.encoder(inputs, **kwargs) + return { + ENCODER_OUT: { + ACTOR: encoder_outs[ENCODER_OUT], + **( + {} + if self.config.inference_only + else {CRITIC: encoder_outs[ENCODER_OUT]} + ), + } + } + else: + # Encoders should not modify inputs, so we can pass the same inputs + actor_out = self.actor_encoder(inputs, **kwargs) + if self.critic_encoder: + critic_out = self.critic_encoder(inputs, **kwargs) + + return { + ENCODER_OUT: { + ACTOR: actor_out[ENCODER_OUT], + **( + {} + if self.config.inference_only + else {CRITIC: critic_out[ENCODER_OUT]} + ), + } + } + + +@ExperimentalAPI +class StatefulActorCriticEncoder(Encoder): + """An encoder that potentially holds two potentially stateful encoders. + + This is a special case of Encoder that can either enclose a single, + shared encoder or two separate encoders: One for the actor and one for the + critic. The two encoders are of the same type, and we can therefore make the + assumption that they have the same input and output specs. + + If this encoder wraps a single encoder, state in input- and output dicts + is simply stored under the key `STATE_IN` and `STATE_OUT`, respectively. + If this encoder wraps two encoders, state in input- and output dicts is + stored under the keys `(STATE_IN, ACTOR)` and `(STATE_IN, CRITIC)` and + `(STATE_OUT, ACTOR)` and `(STATE_OUT, CRITIC)`, respectively. + """ + + framework = None + + def __init__(self, config: ModelConfig) -> None: + super().__init__(config) + + if config.shared: + self.encoder = config.base_encoder_config.build(framework=self.framework) + else: + self.actor_encoder = config.base_encoder_config.build( + framework=self.framework + ) + self.critic_encoder = config.base_encoder_config.build( + framework=self.framework + ) + + @override(Model) + def get_initial_state(self): + if self.config.shared: + return self.encoder.get_initial_state() + else: + return { + ACTOR: self.actor_encoder.get_initial_state(), + CRITIC: self.critic_encoder.get_initial_state(), + } + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + outputs = {} + + if self.config.shared: + outs = self.encoder(inputs, **kwargs) + encoder_out = outs.pop(ENCODER_OUT) + outputs[ENCODER_OUT] = {ACTOR: encoder_out, CRITIC: encoder_out} + outputs[Columns.STATE_OUT] = outs[Columns.STATE_OUT] + else: + # Shallow copy inputs so that we can add states without modifying + # original dict. + actor_inputs = inputs.copy() + critic_inputs = inputs.copy() + actor_inputs[Columns.STATE_IN] = inputs[Columns.STATE_IN][ACTOR] + critic_inputs[Columns.STATE_IN] = inputs[Columns.STATE_IN][CRITIC] + + actor_out = self.actor_encoder(actor_inputs, **kwargs) + critic_out = self.critic_encoder(critic_inputs, **kwargs) + + outputs[ENCODER_OUT] = { + ACTOR: actor_out[ENCODER_OUT], + CRITIC: critic_out[ENCODER_OUT], + } + + outputs[Columns.STATE_OUT] = { + ACTOR: actor_out[Columns.STATE_OUT], + CRITIC: critic_out[Columns.STATE_OUT], + } + + return outputs + + +@DeveloperAPI +def tokenize(tokenizer: Encoder, inputs: dict, framework: str) -> dict: + """Tokenizes the observations from the input dict. + + Args: + tokenizer: The tokenizer to use. + inputs: The input dict. + + Returns: + The output dict. + """ + # Tokenizer may depend solely on observations. + obs = inputs[Columns.OBS] + tokenizer_inputs = {Columns.OBS: obs} + size = list(obs.size() if framework == "torch" else obs.shape) + b_dim, t_dim = size[:2] + fold, unfold = get_fold_unfold_fns(b_dim, t_dim, framework=framework) + # Push through the tokenizer encoder. + out = tokenizer(fold(tokenizer_inputs)) + out = out[ENCODER_OUT] + # Then unfold batch- and time-dimensions again. + return unfold(out) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/catalog.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/catalog.py new file mode 100644 index 0000000000000000000000000000000000000000..136dd713e01aff7183ba8dee308a2474ac8c6a9f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/catalog.py @@ -0,0 +1,667 @@ +import dataclasses +import enum +import functools +from typing import Optional + +import gymnasium as gym +import numpy as np +import tree +from gymnasium.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple + +from ray.rllib.core.models.base import Encoder +from ray.rllib.core.models.configs import ( + CNNEncoderConfig, + MLPEncoderConfig, + RecurrentEncoderConfig, +) +from ray.rllib.core.models.configs import ModelConfig +from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig +from ray.rllib.models.distributions import Distribution +from ray.rllib.models.preprocessors import get_preprocessor, Preprocessor +from ray.rllib.models.utils import get_filter_config +from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE +from ray.rllib.utils.error import UnsupportedSpaceException +from ray.rllib.utils.spaces.simplex import Simplex +from ray.rllib.utils.spaces.space_utils import flatten_space +from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space +from ray.rllib.utils.annotations import ( + OverrideToImplementCustomLogic, + OverrideToImplementCustomLogic_CallToSuperRecommended, +) + + +class Catalog: + """Describes the sub-module-architectures to be used in RLModules. + + RLlib's native RLModules get their Models from a Catalog object. + By default, that Catalog builds the configs it has as attributes. + This component was build to be hackable and extensible. You can inject custom + components into RL Modules by overriding the `build_xxx` methods of this class. + Note that it is recommended to write a custom RL Module for a single use-case. + Modifications to Catalogs mostly make sense if you want to reuse the same + Catalog for different RL Modules. For example if you have written a custom + encoder and want to inject it into different RL Modules (e.g. for PPO, DQN, etc.). + You can influence the decision tree that determines the sub-components by modifying + `Catalog._determine_components_hook`. + + Usage example: + + # Define a custom catalog + + .. testcode:: + + import torch + import gymnasium as gym + from ray.rllib.core.models.configs import MLPHeadConfig + from ray.rllib.core.models.catalog import Catalog + + class MyCatalog(Catalog): + def __init__( + self, + observation_space: gym.Space, + action_space: gym.Space, + model_config_dict: dict, + ): + super().__init__(observation_space, action_space, model_config_dict) + self.my_model_config = MLPHeadConfig( + hidden_layer_dims=[64, 32], + input_dims=[self.observation_space.shape[0]], + ) + + def build_my_head(self, framework: str): + return self.my_model_config.build(framework=framework) + + # With that, RLlib can build and use models from this catalog like this: + catalog = MyCatalog(gym.spaces.Box(0, 1), gym.spaces.Box(0, 1), {}) + my_head = catalog.build_my_head(framework="torch") + + # Make a call to the built model. + out = my_head(torch.Tensor([[1]])) + """ + + # TODO (Sven): Add `framework` arg to c'tor and remove this arg from `build` + # methods. This way, we can already know in the c'tor of Catalog, what the exact + # action distibution objects are and thus what the output dims for e.g. a pi-head + # will be. + def __init__( + self, + observation_space: gym.Space, + action_space: gym.Space, + model_config_dict: dict, + # deprecated args. + view_requirements=DEPRECATED_VALUE, + ): + """Initializes a Catalog with a default encoder config. + + Args: + observation_space: The observation space of the environment. + action_space: The action space of the environment. + model_config_dict: The model config that specifies things like hidden + dimensions and activations functions to use in this Catalog. + """ + if view_requirements != DEPRECATED_VALUE: + deprecation_warning(old="Catalog(view_requirements=..)", error=True) + + # TODO (sven): The following logic won't be needed anymore, once we get rid of + # Catalogs entirely. We will assert directly inside the algo's DefaultRLModule + # class that the `model_config` is a DefaultModelConfig. Thus users won't be + # able to pass in partial config dicts into a default model (alternatively, we + # could automatically augment the user provided dict by the default config + # dataclass object only(!) for default modules). + if dataclasses.is_dataclass(model_config_dict): + model_config_dict = dataclasses.asdict(model_config_dict) + default_config = dataclasses.asdict(DefaultModelConfig()) + # end: TODO + + self.observation_space = observation_space + self.action_space = action_space + + self._model_config_dict = default_config | model_config_dict + self._latent_dims = None + + self._determine_components_hook() + + @OverrideToImplementCustomLogic_CallToSuperRecommended + def _determine_components_hook(self): + """Decision tree hook for subclasses to override. + + By default, this method executes the decision tree that determines the + components that a Catalog builds. You can extend the components by overriding + this or by adding to the constructor of your subclass. + + Override this method if you don't want to use the default components + determined here. If you want to use them but add additional components, you + should call `super()._determine_components()` at the beginning of your + implementation. + + This makes it so that subclasses are not forced to create an encoder config + if the rest of their catalog is not dependent on it or if it breaks. + At the end of this method, an attribute `Catalog.latent_dims` + should be set so that heads can be built using that information. + """ + self._encoder_config = self._get_encoder_config( + observation_space=self.observation_space, + action_space=self.action_space, + model_config_dict=self._model_config_dict, + ) + + # Create a function that can be called when framework is known to retrieve the + # class type for action distributions + self._action_dist_class_fn = functools.partial( + self._get_dist_cls_from_action_space, action_space=self.action_space + ) + + # The dimensions of the latent vector that is output by the encoder and fed + # to the heads. + self.latent_dims = self._encoder_config.output_dims + + @property + def latent_dims(self): + """Returns the latent dimensions of the encoder. + + This establishes an agreement between encoder and heads about the latent + dimensions. Encoders can be built to output a latent tensor with + `latent_dims` dimensions, and heads can be built with tensors of + `latent_dims` dimensions as inputs. This can be safely ignored if this + agreement is not needed in case of modifications to the Catalog. + + Returns: + The latent dimensions of the encoder. + """ + return self._latent_dims + + @latent_dims.setter + def latent_dims(self, value): + self._latent_dims = value + + @OverrideToImplementCustomLogic + def build_encoder(self, framework: str) -> Encoder: + """Builds the encoder. + + By default, this method builds an encoder instance from Catalog._encoder_config. + + You should override this if you want to use RLlib's default RL Modules but + only want to change the encoder. For example, if you want to use a custom + encoder, but want to use RLlib's default heads, action distribution and how + tensors are routed between them. If you want to have full control over the + RL Module, we recommend writing your own RL Module by inheriting from one of + RLlib's RL Modules instead. + + Args: + framework: The framework to use. Either "torch" or "tf2". + + Returns: + The encoder. + """ + assert hasattr(self, "_encoder_config"), ( + "You must define a `Catalog._encoder_config` attribute in your Catalog " + "subclass or override the `Catalog.build_encoder` method. By default, " + "an encoder_config is created in the __post_init__ method." + ) + return self._encoder_config.build(framework=framework) + + @OverrideToImplementCustomLogic + def get_action_dist_cls(self, framework: str): + """Get the action distribution class. + + The default behavior is to get the action distribution from the + `Catalog._action_dist_class_fn`. + + You should override this to have RLlib build your custom action + distribution instead of the default one. For example, if you don't want to + use RLlib's default RLModules with their default models, but only want to + change the distribution that Catalog returns. + + Args: + framework: The framework to use. Either "torch" or "tf2". + + Returns: + The action distribution. + """ + assert hasattr(self, "_action_dist_class_fn"), ( + "You must define a `Catalog._action_dist_class_fn` attribute in your " + "Catalog subclass or override the `Catalog.action_dist_class_fn` method. " + "By default, an action_dist_class_fn is created in the __post_init__ " + "method." + ) + return self._action_dist_class_fn(framework=framework) + + @classmethod + def _get_encoder_config( + cls, + observation_space: gym.Space, + model_config_dict: dict, + action_space: gym.Space = None, + ) -> ModelConfig: + """Returns an EncoderConfig for the given input_space and model_config_dict. + + Encoders are usually used in RLModules to transform the input space into a + latent space that is then fed to the heads. The returned EncoderConfig + objects correspond to the built-in Encoder classes in RLlib. + For example, for a simple 1D-Box input_space, RLlib offers an + MLPEncoder, hence this method returns the MLPEncoderConfig. You can overwrite + this method to produce specific EncoderConfigs for your custom Models. + + The following input spaces lead to the following configs: + - 1D-Box: MLPEncoderConfig + - 3D-Box: CNNEncoderConfig + # TODO (Artur): Support more spaces here + # ... + + Args: + observation_space: The observation space to use. + model_config_dict: The model config to use. + action_space: The action space to use if actions are to be encoded. This + is commonly the case for LSTM models. + + Returns: + The encoder config. + """ + activation = model_config_dict["fcnet_activation"] + output_activation = model_config_dict["fcnet_activation"] + use_lstm = model_config_dict["use_lstm"] + + if use_lstm: + encoder_config = RecurrentEncoderConfig( + input_dims=observation_space.shape, + recurrent_layer_type="lstm", + hidden_dim=model_config_dict["lstm_cell_size"], + hidden_weights_initializer=model_config_dict["lstm_kernel_initializer"], + hidden_weights_initializer_config=model_config_dict[ + "lstm_kernel_initializer_kwargs" + ], + hidden_bias_initializer=model_config_dict["lstm_bias_initializer"], + hidden_bias_initializer_config=model_config_dict[ + "lstm_bias_initializer_kwargs" + ], + batch_major=True, + num_layers=1, + tokenizer_config=cls.get_tokenizer_config( + observation_space, + model_config_dict, + ), + ) + else: + # TODO (Artur): Maybe check for original spaces here + # input_space is a 1D Box + if isinstance(observation_space, Box) and len(observation_space.shape) == 1: + # In order to guarantee backward compatability with old configs, + # we need to check if no latent dim was set and simply reuse the last + # fcnet hidden dim for that purpose. + hidden_layer_dims = model_config_dict["fcnet_hiddens"][:-1] + encoder_latent_dim = model_config_dict["fcnet_hiddens"][-1] + encoder_config = MLPEncoderConfig( + input_dims=observation_space.shape, + hidden_layer_dims=hidden_layer_dims, + hidden_layer_activation=activation, + hidden_layer_weights_initializer=model_config_dict[ + "fcnet_kernel_initializer" + ], + hidden_layer_weights_initializer_config=model_config_dict[ + "fcnet_kernel_initializer_kwargs" + ], + hidden_layer_bias_initializer=model_config_dict[ + "fcnet_bias_initializer" + ], + hidden_layer_bias_initializer_config=model_config_dict[ + "fcnet_bias_initializer_kwargs" + ], + output_layer_dim=encoder_latent_dim, + output_layer_activation=output_activation, + output_layer_weights_initializer=model_config_dict[ + "fcnet_kernel_initializer" + ], + output_layer_weights_initializer_config=model_config_dict[ + "fcnet_kernel_initializer_kwargs" + ], + output_layer_bias_initializer=model_config_dict[ + "fcnet_bias_initializer" + ], + output_layer_bias_initializer_config=model_config_dict[ + "fcnet_bias_initializer_kwargs" + ], + ) + + # input_space is a 3D Box + elif ( + isinstance(observation_space, Box) and len(observation_space.shape) == 3 + ): + if not model_config_dict.get("conv_filters"): + model_config_dict["conv_filters"] = get_filter_config( + observation_space.shape + ) + + encoder_config = CNNEncoderConfig( + input_dims=observation_space.shape, + cnn_filter_specifiers=model_config_dict["conv_filters"], + cnn_activation=model_config_dict["conv_activation"], + cnn_kernel_initializer=model_config_dict["conv_kernel_initializer"], + cnn_kernel_initializer_config=model_config_dict[ + "conv_kernel_initializer_kwargs" + ], + cnn_bias_initializer=model_config_dict["conv_bias_initializer"], + cnn_bias_initializer_config=model_config_dict[ + "conv_bias_initializer_kwargs" + ], + ) + # input_space is a 2D Box + elif ( + isinstance(observation_space, Box) and len(observation_space.shape) == 2 + ): + # RLlib used to support 2D Box spaces by silently flattening them + raise ValueError( + f"No default encoder config for obs space={observation_space}," + f" lstm={use_lstm} found. 2D Box " + f"spaces are not supported. They should be either flattened to a " + f"1D Box space or enhanced to be a 3D box space." + ) + # input_space is a possibly nested structure of spaces. + else: + # NestedModelConfig + raise ValueError( + f"No default encoder config for obs space={observation_space}," + f" lstm={use_lstm} found." + ) + + return encoder_config + + @classmethod + @OverrideToImplementCustomLogic + def get_tokenizer_config( + cls, + observation_space: gym.Space, + model_config_dict: dict, + # deprecated args. + view_requirements=DEPRECATED_VALUE, + ) -> ModelConfig: + """Returns a tokenizer config for the given space. + + This is useful for recurrent / transformer models that need to tokenize their + inputs. By default, RLlib uses the models supported by Catalog out of the box to + tokenize. + + You should override this method if you want to change the custom tokenizer + inside current encoders that Catalog returns without providing the recurrent + network as a whole. For example, if you want to define some custom CNN layers + as a tokenizer for a recurrent encoder that already includes the recurrent + layers and handles the state. + + Args: + observation_space: The observation space to use. + model_config_dict: The model config to use. + """ + if view_requirements != DEPRECATED_VALUE: + deprecation_warning(old="Catalog(view_requirements=..)", error=True) + + return cls._get_encoder_config( + observation_space=observation_space, + # Use model_config_dict without flags that would end up in complex models + model_config_dict={ + **model_config_dict, + **{"use_lstm": False, "use_attention": False}, + }, + ) + + @classmethod + def _get_dist_cls_from_action_space( + cls, + action_space: gym.Space, + *, + framework: Optional[str] = None, + ) -> Distribution: + """Returns a distribution class for the given action space. + + You can get the required input dimension for the distribution by calling + `action_dict_cls.required_input_dim(action_space)` + on the retrieved class. This is useful, because the Catalog needs to find out + about the required input dimension for the distribution before the model that + outputs these inputs is configured. + + Args: + action_space: Action space of the target gym env. + framework: The framework to use. + + Returns: + The distribution class for the given action space. + """ + # If no framework provided, return no action distribution class (None). + if framework is None: + return None + # This method is structured in two steps: + # Firstly, construct a dictionary containing the available distribution classes. + # Secondly, return the correct distribution class for the given action space. + + # Step 1: Construct the dictionary. + + class DistEnum(enum.Enum): + Categorical = "Categorical" + DiagGaussian = "Gaussian" + Deterministic = "Deterministic" + MultiDistribution = "MultiDistribution" + MultiCategorical = "MultiCategorical" + + if framework == "torch": + from ray.rllib.models.torch.torch_distributions import ( + TorchCategorical, + TorchDeterministic, + TorchDiagGaussian, + ) + + distribution_dicts = { + DistEnum.Deterministic: TorchDeterministic, + DistEnum.DiagGaussian: TorchDiagGaussian, + DistEnum.Categorical: TorchCategorical, + } + elif framework == "tf2": + from ray.rllib.models.tf.tf_distributions import ( + TfCategorical, + TfDeterministic, + TfDiagGaussian, + ) + + distribution_dicts = { + DistEnum.Deterministic: TfDeterministic, + DistEnum.DiagGaussian: TfDiagGaussian, + DistEnum.Categorical: TfCategorical, + } + else: + raise ValueError( + f"Unknown framework: {framework}. Only 'torch' and 'tf2' are " + "supported for RLModule Catalogs." + ) + + # Only add a MultiAction distribution class to the dict if we can compute its + # components (we need a Tuple/Dict space for this). + if isinstance(action_space, (Tuple, Dict)): + partial_multi_action_distribution_cls = _multi_action_dist_partial_helper( + catalog_cls=cls, + action_space=action_space, + framework=framework, + ) + + distribution_dicts[ + DistEnum.MultiDistribution + ] = partial_multi_action_distribution_cls + + # Only add a MultiCategorical distribution class to the dict if we can compute + # its components (we need a MultiDiscrete space for this). + if isinstance(action_space, MultiDiscrete): + partial_multi_categorical_distribution_cls = ( + _multi_categorical_dist_partial_helper( + action_space=action_space, + framework=framework, + ) + ) + + distribution_dicts[ + DistEnum.MultiCategorical + ] = partial_multi_categorical_distribution_cls + + # Step 2: Return the correct distribution class for the given action space. + + # Box space -> DiagGaussian OR Deterministic. + if isinstance(action_space, Box): + if action_space.dtype.char in np.typecodes["AllInteger"]: + raise ValueError( + "Box(..., `int`) action spaces are not supported. " + "Use MultiDiscrete or Box(..., `float`)." + ) + else: + if len(action_space.shape) > 1: + raise UnsupportedSpaceException( + f"Action space has multiple dimensions {action_space.shape}. " + f"Consider reshaping this into a single dimension, using a " + f"custom action distribution, using a Tuple action space, " + f"or the multi-agent API." + ) + return distribution_dicts[DistEnum.DiagGaussian] + + # Discrete Space -> Categorical. + elif isinstance(action_space, Discrete): + return distribution_dicts[DistEnum.Categorical] + + # Tuple/Dict Spaces -> MultiAction. + elif isinstance(action_space, (Tuple, Dict)): + return distribution_dicts[DistEnum.MultiDistribution] + + # Simplex -> Dirichlet. + elif isinstance(action_space, Simplex): + # TODO(Artur): Supported Simplex (in torch). + raise NotImplementedError("Simplex action space not yet supported.") + + # MultiDiscrete -> MultiCategorical. + elif isinstance(action_space, MultiDiscrete): + return distribution_dicts[DistEnum.MultiCategorical] + + # Unknown type -> Error. + else: + raise NotImplementedError(f"Unsupported action space: `{action_space}`") + + @staticmethod + def get_preprocessor(observation_space: gym.Space, **kwargs) -> Preprocessor: + """Returns a suitable preprocessor for the given observation space. + + Args: + observation_space: The input observation space. + **kwargs: Forward-compatible kwargs. + + Returns: + preprocessor: Preprocessor for the observations. + """ + # TODO(Artur): Since preprocessors have long been @PublicAPI with the options + # kwarg as part of their constructor, we fade out support for this, + # beginning with this entrypoint. + # Next, we should deprecate the `options` kwarg from the Preprocessor itself, + # after deprecating the old catalog and other components that still pass this. + options = kwargs.get("options", {}) + if options: + deprecation_warning( + old="get_preprocessor_for_space(..., options={...})", + help="Override `Catalog.get_preprocessor()` " + "in order to implement custom behaviour.", + error=False, + ) + + if options.get("custom_preprocessor"): + deprecation_warning( + old="model_config['custom_preprocessor']", + help="Custom preprocessors are deprecated, " + "since they sometimes conflict with the built-in " + "preprocessors for handling complex observation spaces. " + "Please use wrapper classes around your environment " + "instead.", + error=True, + ) + else: + # TODO(Artur): Inline the get_preprocessor() call here once we have + # deprecated the old model catalog. + cls = get_preprocessor(observation_space) + prep = cls(observation_space, options) + return prep + + +def _multi_action_dist_partial_helper( + catalog_cls: "Catalog", action_space: gym.Space, framework: str +) -> Distribution: + """Helper method to get a partial of a MultiActionDistribution. + + This is useful for when we want to create MultiActionDistributions from + logits only (!) later, but know the action space now already. + + Args: + catalog_cls: The ModelCatalog class to use. + action_space: The action space to get the child distribution classes for. + framework: The framework to use. + + Returns: + A partial of the TorchMultiActionDistribution class. + """ + action_space_struct = get_base_struct_from_space(action_space) + flat_action_space = flatten_space(action_space) + child_distribution_cls_struct = tree.map_structure( + lambda s: catalog_cls._get_dist_cls_from_action_space( + action_space=s, + framework=framework, + ), + action_space_struct, + ) + flat_distribution_clses = tree.flatten(child_distribution_cls_struct) + + logit_lens = [ + int(dist_cls.required_input_dim(space)) + for dist_cls, space in zip(flat_distribution_clses, flat_action_space) + ] + + if framework == "torch": + from ray.rllib.models.torch.torch_distributions import ( + TorchMultiDistribution, + ) + + multi_action_dist_cls = TorchMultiDistribution + elif framework == "tf2": + from ray.rllib.models.tf.tf_distributions import TfMultiDistribution + + multi_action_dist_cls = TfMultiDistribution + else: + raise ValueError(f"Unsupported framework: {framework}") + + partial_dist_cls = multi_action_dist_cls.get_partial_dist_cls( + space=action_space, + child_distribution_cls_struct=child_distribution_cls_struct, + input_lens=logit_lens, + ) + return partial_dist_cls + + +def _multi_categorical_dist_partial_helper( + action_space: gym.Space, framework: str +) -> Distribution: + """Helper method to get a partial of a MultiCategorical Distribution. + + This is useful for when we want to create MultiCategorical Distribution from + logits only (!) later, but know the action space now already. + + Args: + action_space: The action space to get the child distribution classes for. + framework: The framework to use. + + Returns: + A partial of the MultiCategorical class. + """ + + if framework == "torch": + from ray.rllib.models.torch.torch_distributions import TorchMultiCategorical + + multi_categorical_dist_cls = TorchMultiCategorical + elif framework == "tf2": + from ray.rllib.models.tf.tf_distributions import TfMultiCategorical + + multi_categorical_dist_cls = TfMultiCategorical + else: + raise ValueError(f"Unsupported framework: {framework}") + + partial_dist_cls = multi_categorical_dist_cls.get_partial_dist_cls( + space=action_space, input_lens=list(action_space.nvec) + ) + + return partial_dist_cls diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/configs.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/configs.py new file mode 100644 index 0000000000000000000000000000000000000000..60a0758bbd76055e67e89a66da5185d5239d986e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/configs.py @@ -0,0 +1,1095 @@ +import abc +from dataclasses import dataclass, field +import functools +from typing import Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Union + +import numpy as np + +from ray.rllib.models.torch.misc import ( + same_padding, + same_padding_transpose_after_stride, + valid_padding, +) +from ray.rllib.models.utils import get_activation_fn, get_initializer_fn +from ray.rllib.utils.annotations import ExperimentalAPI + +if TYPE_CHECKING: + from ray.rllib.core.models.base import Model, Encoder + + +@ExperimentalAPI +def _framework_implemented(torch: bool = True, tf2: bool = True): + """Decorator to check if a model was implemented in a framework. + + Args: + torch: Whether we can build this model with torch. + tf2: Whether we can build this model with tf2. + + Returns: + The decorated function. + + Raises: + ValueError: If the framework is not available to build. + """ + accepted = [] + if torch: + accepted.append("torch") + if tf2: + accepted.append("tf2") + + def decorator(fn: Callable) -> Callable: + @functools.wraps(fn) + def checked_build(self, framework, **kwargs): + if framework not in accepted: + raise ValueError( + f"This config does not support framework " + f"{framework}. Only frameworks in {accepted} are " + f"supported." + ) + return fn(self, framework, **kwargs) + + return checked_build + + return decorator + + +@ExperimentalAPI +@dataclass +class ModelConfig(abc.ABC): + """Base class for configuring a `Model` instance. + + ModelConfigs are DL framework-agnostic. + A `Model` (as a sub-component of an `RLModule`) is built via calling the + respective ModelConfig's `build()` method. + RLModules build their sub-components this way after receiving one or more + `ModelConfig` instances from a Catalog object. + + However, `ModelConfig` is not restricted to be used only with Catalog or RLModules. + Usage examples can be found in the individual Model classes', e.g. + see `ray.rllib.core.models.configs::MLPHeadConfig`. + + Attributes: + input_dims: The input dimensions of the network + always_check_shapes: Whether to always check the inputs and outputs of the + model for the specifications. Input specifications are checked on failed + forward passes of the model regardless of this flag. If this flag is set + to `True`, inputs and outputs are checked on every call. This leads to + a slow-down and should only be used for debugging. + """ + + input_dims: Union[List[int], Tuple[int]] = None + always_check_shapes: bool = False + + @abc.abstractmethod + def build(self, framework: str): + """Builds the model. + + Args: + framework: The framework to use for building the model. + """ + raise NotImplementedError + + @property + def output_dims(self) -> Optional[Tuple[int]]: + """Read-only `output_dims` are inferred automatically from other settings.""" + return None + + +@ExperimentalAPI +@dataclass +class _MLPConfig(ModelConfig): + """Generic configuration class for multi-layer-perceptron based Model classes. + + `output_dims` is reached by either the provided `output_layer_dim` setting (int) OR + by the last entry of `hidden_layer_dims`. In the latter case, no special output + layer is added and all layers in the stack behave exactly the same. If + `output_layer_dim` is provided, users might also change this last layer's + activation (`output_layer_activation`) and its bias setting + (`output_layer_use_bias`). + + This is a private class as users should not configure their models directly + through this class, but use one of the sub-classes, e.g. `MLPHeadConfig` or + `MLPEncoderConfig`. + + Attributes: + input_dims: A 1D tensor indicating the input dimension, e.g. `[32]`. + hidden_layer_dims: The sizes of the hidden layers. If an empty list, + `output_layer_dim` must be provided (int) and only a single layer will be + built. + hidden_layer_use_bias: Whether to use bias on all dense layers in the network + (excluding a possible separate output layer defined by `output_layer_dim`). + hidden_layer_activation: The activation function to use after each layer ( + except for the output). The default activation for hidden layers is "relu". + hidden_layer_use_layernorm: Whether to insert a LayerNorm functionality + in between each hidden layer's output and its activation. + hidden_layer_weights_initializer: The initializer function or class to use for + weight initialization in the hidden layers. If `None` the default + initializer of the respective dense layer of a framework (`"torch"` or + `"tf2"`) is used. Note, all initializers defined in the framework `"tf2`) + are allowed. For `"torch"` only the in-place initializers, i.e. ending with + an underscore "_" are allowed. + hidden_layer_weights_initializer_config: Configuration to pass into the + initializer defined in `hidden_layer_weights_initializer`. + hidden_layer_bias_initializer: The initializer function or class to use for + bias initialization in the hidden layers. If `None` the default initializer + of the respective dense layer of a framework (`"torch"` or `"tf2"`) is used. + Note, all initializers defined in the framework `"tf2`) are allowed. For + `"torch"` only the in-place initializers, i.e. ending with an underscore "_" + are allowed. + hidden_layer_bias_initializer_config: Configuration to pass into the + initializer defined in `hidden_layer_bias_initializer`. + output_layer_dim: An int indicating the size of the output layer. This may be + set to `None` in case no extra output layer should be built and only the + layers specified by `hidden_layer_dims` will be part of the network. + output_layer_use_bias: Whether to use bias on the separate output layer, if any. + output_layer_activation: The activation function to use for the output layer, + if any. The default activation for the output layer, if any, is "linear", + meaning no activation. + output_layer_weights_initializer: The initializer function or class to use for + weight initialization in the output layers. If `None` the default + initializer of the respective dense layer of a framework (`"torch"` or ` + "tf2"`) is used. Note, all initializers defined in the framework `"tf2`) are + allowed. For `"torch"` only the in-place initializers, i.e. ending with an + underscore "_" are allowed. + output_layer_weights_initializer_config: Configuration to pass into the + initializer defined in `output_layer_weights_initializer`. + output_layer_bias_initializer: The initializer function or class to use for + bias initialization in the output layers. If `None` the default initializer + of the respective dense layer of a framework (`"torch"` or `"tf2"`) is used. + For `"torch"` only the in-place initializers, i.e. ending with an underscore + "_" are allowed. + output_layer_bias_initializer_config: Configuration to pass into the + initializer defined in `output_layer_bias_initializer`. + clip_log_std: If log std should be clipped by `log_std_clip_param`. This applies + only to the action distribution parameters that encode the log standard + deviation of a `DiagGaussian` distribution. + log_std_clip_param: The clipping parameter for the log std, if clipping should + be applied - i.e. `clip_log_std=True`. The default value is 20, i.e. log + stds are clipped in between -20 and 20. + """ + + hidden_layer_dims: Union[List[int], Tuple[int]] = (256, 256) + hidden_layer_use_bias: bool = True + hidden_layer_activation: str = "relu" + hidden_layer_use_layernorm: bool = False + hidden_layer_weights_initializer: Optional[Union[str, Callable]] = None + hidden_layer_weights_initializer_config: Optional[Dict] = None + hidden_layer_bias_initializer: Optional[Union[str, Callable]] = None + hidden_layer_bias_initializer_config: Optional[Dict] = None + + # Optional last output layer with - possibly - different activation and use_bias + # settings. + output_layer_dim: Optional[int] = None + output_layer_use_bias: bool = True + output_layer_activation: str = "linear" + output_layer_weights_initializer: Optional[Union[str, Callable]] = None + output_layer_weights_initializer_config: Optional[Dict] = None + output_layer_bias_initializer: Optional[Union[str, Callable]] = None + output_layer_bias_initializer_config: Optional[Dict] = None + + # Optional clipping of log standard deviation. + clip_log_std: bool = False + # Optional clip parameter for the log standard deviation. + log_std_clip_param: float = 20.0 + + @property + def output_dims(self): + if self.output_layer_dim is None and not self.hidden_layer_dims: + raise ValueError( + "If `output_layer_dim` is None, you must specify at least one hidden " + "layer dim, e.g. `hidden_layer_dims=[32]`!" + ) + + # Infer `output_dims` automatically. + return (int(self.output_layer_dim or self.hidden_layer_dims[-1]),) + + def _validate(self, framework: str = "torch"): + """Makes sure that settings are valid.""" + if self.input_dims is not None and len(self.input_dims) != 1: + raise ValueError( + f"`input_dims` ({self.input_dims}) of MLPConfig must be 1D, " + "e.g. `[32]`!" + ) + if len(self.output_dims) != 1: + raise ValueError( + f"`output_dims` ({self.output_dims}) of _MLPConfig must be " + "1D, e.g. `[32]`! This is an inferred value, hence other settings might" + " be wrong." + ) + if self.log_std_clip_param is None: + raise ValueError( + "`log_std_clip_param` of _MLPConfig must be a float value, but is " + "`None`." + ) + + # Call these already here to catch errors early on. + get_activation_fn(self.hidden_layer_activation, framework=framework) + get_activation_fn(self.output_layer_activation, framework=framework) + get_initializer_fn(self.hidden_layer_weights_initializer, framework=framework) + get_initializer_fn(self.hidden_layer_bias_initializer, framework=framework) + get_initializer_fn(self.output_layer_weights_initializer, framework=framework) + get_initializer_fn(self.output_layer_bias_initializer, framework=framework) + + +@ExperimentalAPI +@dataclass +class MLPHeadConfig(_MLPConfig): + """Configuration for an MLP head. + + See _MLPConfig for usage details. + + Example: + + .. testcode:: + + # Configuration: + config = MLPHeadConfig( + input_dims=[4], # must be 1D tensor + hidden_layer_dims=[8, 8], + hidden_layer_activation="relu", + hidden_layer_use_layernorm=False, + # final output layer with no activation (linear) + output_layer_dim=2, + output_layer_activation="linear", + ) + model = config.build(framework="tf2") + + # Resulting stack in pseudocode: + # Linear(4, 8, bias=True) + # ReLU() + # Linear(8, 8, bias=True) + # ReLU() + # Linear(8, 2, bias=True) + + Example: + + .. testcode:: + + # Configuration: + config = MLPHeadConfig( + input_dims=[2], + hidden_layer_dims=[10, 4], + hidden_layer_activation="silu", + hidden_layer_use_layernorm=True, + hidden_layer_use_bias=False, + # Initializer for `framework="torch"`. + hidden_layer_weights_initializer="xavier_normal_", + hidden_layer_weights_initializer_config={"gain": 0.8}, + # No final output layer (use last dim in `hidden_layer_dims` + # as the size of the last layer in the stack). + output_layer_dim=None, + ) + model = config.build(framework="torch") + + # Resulting stack in pseudocode: + # Linear(2, 10, bias=False) + # LayerNorm((10,)) # layer norm always before activation + # SiLU() + # Linear(10, 4, bias=False) + # LayerNorm((4,)) # layer norm always before activation + # SiLU() + """ + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Model": + self._validate(framework=framework) + + if framework == "torch": + from ray.rllib.core.models.torch.heads import TorchMLPHead + + return TorchMLPHead(self) + else: + from ray.rllib.core.models.tf.heads import TfMLPHead + + return TfMLPHead(self) + + +@ExperimentalAPI +@dataclass +class FreeLogStdMLPHeadConfig(_MLPConfig): + """Configuration for an MLPHead with a floating second half of outputs. + + This model can be useful together with Gaussian Distributions. + This gaussian distribution would be conditioned as follows: + - The first half of outputs from this model can be used as + state-dependent means when conditioning a gaussian distribution + - The second half are floating free biases that can be used as + state-independent standard deviations to condition a gaussian distribution. + The mean values are produced by an MLPHead, while the standard + deviations are added as floating free biases from a single 1D trainable variable + (not dependent on the net's inputs). + + The output dimensions of the configured MLPHeadConfig must be even and are + divided by two to gain the output dimensions of each the mean-net and the + free std-variable. + + Example: + .. testcode:: + :skipif: True + + # Configuration: + config = FreeLogStdMLPHeadConfig( + input_dims=[2], + hidden_layer_dims=[16], + hidden_layer_activation=None, + hidden_layer_use_layernorm=False, + hidden_layer_use_bias=True, + output_layer_dim=8, # <- this must be an even size + output_layer_use_bias=True, + ) + model = config.build(framework="tf2") + + # Resulting stack in pseudocode: + # Linear(2, 16, bias=True) + # Linear(8, 8, bias=True) # 16 / 2 = 8 -> 8 nodes for the mean + # Extra variable: + # Tensor((8,), float32) # for the free (observation independent) std outputs + + Example: + .. testcode:: + :skipif: True + + # Configuration: + config = FreeLogStdMLPHeadConfig( + input_dims=[2], + hidden_layer_dims=[31, 100], # <- last idx must be an even size + hidden_layer_activation="relu", + hidden_layer_use_layernorm=False, + hidden_layer_use_bias=False, + output_layer_dim=None, # use the last hidden layer as output layer + ) + model = config.build(framework="torch") + + # Resulting stack in pseudocode: + # Linear(2, 31, bias=False) + # ReLu() + # Linear(31, 50, bias=False) # 100 / 2 = 50 -> 50 nodes for the mean + # ReLu() + # Extra variable: + # Tensor((50,), float32) # for the free (observation independent) std outputs + """ + + def _validate(self, framework: str = "torch"): + if len(self.output_dims) > 1 or self.output_dims[0] % 2 == 1: + raise ValueError( + f"`output_layer_dim` ({self.ouput_layer_dim}) or the last value in " + f"`hidden_layer_dims` ({self.hidden_layer_dims}) of a " + "FreeLogStdMLPHeadConfig must be an even int (dividable by 2), " + "e.g. `output_layer_dim=8` or `hidden_layer_dims=[133, 128]`!" + ) + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Model": + self._validate(framework=framework) + + if framework == "torch": + from ray.rllib.core.models.torch.heads import TorchFreeLogStdMLPHead + + return TorchFreeLogStdMLPHead(self) + else: + from ray.rllib.core.models.tf.heads import TfFreeLogStdMLPHead + + return TfFreeLogStdMLPHead(self) + + +@ExperimentalAPI +@dataclass +class CNNTransposeHeadConfig(ModelConfig): + """Configuration for a convolutional transpose head (decoder) network. + + The configured Model transforms 1D-observations into an image space. + The stack of layers is composed of an initial Dense layer, followed by a sequence + of Conv2DTranspose layers. + `input_dims` describes the shape of the (1D) input tensor, + `initial_image_dims` describes the input into the first Conv2DTranspose + layer, where the translation from `input_dim` to `initial_image_dims` is done + via the initial Dense layer (w/o activation, w/o layer-norm, and w/ bias). + + Beyond that, each layer specified by `cnn_transpose_filter_specifiers` + is followed by an activation function according to `cnn_transpose_activation`. + + `output_dims` is reached after the final Conv2DTranspose layer. + Not that the last Conv2DTranspose layer is never activated and never layer-norm'd + regardless of the other settings. + + An example for a single conv2d operation is as follows: + Input "image" is (4, 4, 24) (not yet strided), padding is "same", stride=2, + kernel=5. + + First, the input "image" is strided (with stride=2): + + Input image (4x4 (x24)): + A B C D + E F G H + I J K L + M N O P + + Stride with stride=2 -> (7x7 (x24)) + A 0 B 0 C 0 D + 0 0 0 0 0 0 0 + E 0 F 0 G 0 H + 0 0 0 0 0 0 0 + I 0 J 0 K 0 L + 0 0 0 0 0 0 0 + M 0 N 0 O 0 P + + Then this strided "image" (strided_size=7x7) is padded (exact padding values will be + computed by the model): + + Padding -> (left=3, right=2, top=3, bottom=2) + + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 A 0 B 0 C 0 D 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 E 0 F 0 G 0 H 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 I 0 J 0 K 0 L 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 M 0 N 0 O 0 P 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + + Then deconvolution with kernel=5 yields an output "image" of 8x8 (x num output + filters). + + Attributes: + input_dims: The input dimensions of the network. This must be a 1D tensor. + initial_image_dims: The shape of the input to the first + Conv2DTranspose layer. We will make sure the input is transformed to + these dims via a preceding initial Dense layer, followed by a reshape, + before entering the Conv2DTranspose stack. + initial_dense_weights_initializer: The initializer function or class to use for + weight initialization in the initial dense layer. If `None` the default + initializer of the respective dense layer of a framework (`"torch"` or + `"tf2"`) is used. Note, all initializers defined in the framework `"tf2`) + are allowed. For `"torch"` only the in-place initializers, i.e. ending with + an underscore "_" are allowed. + initial_dense_weights_initializer_config: Configuration to pass into the + initializer defined in `initial_dense_weights_initializer`. + initial_dense_bias_initializer: The initializer function or class to use for + bias initialization in the initial dense layer. If `None` the default + initializer of the respective CNN layer of a framework (`"torch"` or `"tf2"` + ) is used. For `"torch"` only the in-place initializers, i.e. ending with an + underscore "_" are allowed. + initial_dense_bias_initializer_config: Configuration to pass into the + initializer defined in `initial_dense_bias_initializer`. + cnn_transpose_filter_specifiers: A list of lists, where each element of an inner + list contains elements of the form + `[number of channels/filters, [kernel width, kernel height], stride]` to + specify a convolutional layer stacked in order of the outer list. + cnn_transpose_use_bias: Whether to use bias on all Conv2DTranspose layers. + cnn_transpose_activation: The activation function to use after each layer + (except for the output). + cnn_transpose_use_layernorm: Whether to insert a LayerNorm functionality + in between each Conv2DTranspose layer's output and its activation. + cnn_transpose_kernel_initializer: The initializer function or class to use for + kernel initialization in the CNN layers. If `None` the default initializer + of the respective CNN layer of a framework (`"torch"` or `"tf2"`) is used. + Note, all initializers defined in the framework `"tf2`) are allowed. For + `"torch"` only the in-place initializers, i.e. ending with an underscore "_" + are allowed. + cnn_transpose_kernel_initializer_config: Configuration to pass into the + initializer defined in `cnn_transpose_kernel_initializer`. + cnn_transpose_bias_initializer: The initializer function or class to use for + bias initialization in the CNN layers. If `None` the default initializer of + the respective CNN layer of a framework (`"torch"` or `"tf2"`) is used. + For `"torch"` only the in-place initializers, i.e. ending with an underscore + "_" are allowed. + cnn_transpose_bias_initializer_config: Configuration to pass into the + initializer defined in `cnn_transpose_bias_initializer`. + + Example: + .. testcode:: + :skipif: True + + # Configuration: + config = CNNTransposeHeadConfig( + input_dims=[10], # 1D input vector (possibly coming from another NN) + initial_image_dims=[4, 4, 96], # first image input to deconv stack + # Initializer for TensorFlow. + initial_dense_weights_initializer="HeNormal", + initial_dense_weights_initializer={"seed": 334}, + cnn_transpose_filter_specifiers=[ + [48, [4, 4], 2], + [24, [4, 4], 2], + [3, [4, 4], 2], + ], + cnn_transpose_activation="silu", # or "swish", which is the same + cnn_transpose_use_layernorm=False, + cnn_use_bias=True, + ) + model = config.build(framework="torch) + + # Resulting stack in pseudocode: + # Linear(10, 4*4*24) + # Conv2DTranspose( + # in_channels=96, out_channels=48, + # kernel_size=[4, 4], stride=2, bias=True, + # ) + # Swish() + # Conv2DTranspose( + # in_channels=48, out_channels=24, + # kernel_size=[4, 4], stride=2, bias=True, + # ) + # Swish() + # Conv2DTranspose( + # in_channels=24, out_channels=3, + # kernel_size=[4, 4], stride=2, bias=True, + # ) + + Example: + .. testcode:: + :skipif: True + + # Configuration: + config = CNNTransposeHeadConfig( + input_dims=[128], # 1D input vector (possibly coming from another NN) + initial_image_dims=[4, 4, 32], # first image input to deconv stack + cnn_transpose_filter_specifiers=[ + [16, 4, 2], + [3, 4, 2], + ], + cnn_transpose_activation="relu", + cnn_transpose_use_layernorm=True, + cnn_use_bias=False, + # Initializer for `framework="tf2"`. + # Note, for Torch only in-place initializers are allowed. + cnn_transpose_kernel_initializer="xavier_normal_", + cnn_transpose_kernel_initializer_config={"gain": 0.8}, + ) + model = config.build(framework="torch) + + # Resulting stack in pseudocode: + # Linear(128, 4*4*32, bias=True) # bias always True for initial dense layer + # Conv2DTranspose( + # in_channels=32, out_channels=16, + # kernel_size=[4, 4], stride=2, bias=False, + # ) + # LayerNorm((-3, -2, -1)) # layer normalize over last 3 axes + # ReLU() + # Conv2DTranspose( + # in_channels=16, out_channels=3, + # kernel_size=[4, 4], stride=2, bias=False, + # ) + """ + + input_dims: Union[List[int], Tuple[int]] = None + initial_image_dims: Union[List[int], Tuple[int]] = field( + default_factory=lambda: [4, 4, 96] + ) + initial_dense_weights_initializer: Optional[Union[str, Callable]] = None + initial_dense_weights_initializer_config: Optional[Dict] = None + initial_dense_bias_initializer: Optional[Union[str, Callable]] = None + initial_dense_bias_initializer_config: Optional[Dict] = None + cnn_transpose_filter_specifiers: List[List[Union[int, List[int]]]] = field( + default_factory=lambda: [[48, [4, 4], 2], [24, [4, 4], 2], [3, [4, 4], 2]] + ) + cnn_transpose_use_bias: bool = True + cnn_transpose_activation: str = "relu" + cnn_transpose_use_layernorm: bool = False + cnn_transpose_kernel_initializer: Optional[Union[str, Callable]] = None + cnn_transpose_kernel_initializer_config: Optional[Dict] = None + cnn_transpose_bias_initializer: Optional[Union[str, Callable]] = None + cnn_transpose_bias_initializer_config: Optional[Dict] = None + + @property + def output_dims(self): + # Infer output dims, layer by layer. + dims = self.initial_image_dims + for filter_spec in self.cnn_transpose_filter_specifiers: + # Same padding. + num_filters, kernel, stride = filter_spec + # Compute stride output size first (striding is performed first in a + # conv transpose layer. + stride_w, stride_h = (stride, stride) if isinstance(stride, int) else stride + dims = [ + dims[0] * stride_w - (stride_w - 1), + dims[1] * stride_h - (stride_h - 1), + num_filters, + ] + # TODO (Sven): Support "valid" padding for Conv2DTranspose layers, too. + # Analogous to Conv2D Layers in a CNNEncoder. + # Apply the correct padding. Note that this might be asymetrical, meaning + # left padding might be != right padding, same for top/bottom. + _, padding_out_size = same_padding_transpose_after_stride( + (dims[0], dims[1]), kernel, stride + ) + # Perform conv transpose operation with the kernel. + kernel_w, kernel_h = (kernel, kernel) if isinstance(kernel, int) else kernel + dims = [ + padding_out_size[0] - (kernel_w - 1), + padding_out_size[1] - (kernel_h - 1), + num_filters, + ] + return tuple(dims) + + def _validate(self, framework: str = "torch"): + if len(self.input_dims) != 1: + raise ValueError( + f"`input_dims` ({self.input_dims}) of CNNTransposeHeadConfig must be a " + "3D tensor (image-like) with the dimensions meaning: width x height x " + "num_filters, e.g. `[4, 4, 92]`!" + ) + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Model": + self._validate(framework) + + if framework == "torch": + from ray.rllib.core.models.torch.heads import TorchCNNTransposeHead + + return TorchCNNTransposeHead(self) + + elif framework == "tf2": + from ray.rllib.core.models.tf.heads import TfCNNTransposeHead + + return TfCNNTransposeHead(self) + + +@ExperimentalAPI +@dataclass +class CNNEncoderConfig(ModelConfig): + """Configuration for a convolutional (encoder) network. + + The configured CNN encodes 3D-observations into a latent space. + The stack of layers is composed of a sequence of convolutional layers. + `input_dims` describes the shape of the input tensor. Beyond that, each layer + specified by `filter_specifiers` is followed by an activation function according + to `filter_activation`. + + `output_dims` is reached by either the final convolutional layer's output directly + OR by flatten this output. + + See ModelConfig for usage details. + + Example: + + .. testcode:: + + # Configuration: + config = CNNEncoderConfig( + input_dims=[84, 84, 3], # must be 3D tensor (image: w x h x C) + cnn_filter_specifiers=[ + [16, [8, 8], 4], + [32, [4, 4], 2], + ], + cnn_activation="relu", + cnn_use_layernorm=False, + cnn_use_bias=True, + ) + model = config.build(framework="torch") + + # Resulting stack in pseudocode: + # Conv2D( + # in_channels=3, out_channels=16, + # kernel_size=[8, 8], stride=[4, 4], bias=True, + # ) + # ReLU() + # Conv2D( + # in_channels=16, out_channels=32, + # kernel_size=[4, 4], stride=[2, 2], bias=True, + # ) + # ReLU() + # Conv2D( + # in_channels=32, out_channels=1, + # kernel_size=[1, 1], stride=[1, 1], bias=True, + # ) + # Flatten() + + Attributes: + input_dims: The input dimension of the network. These must be given in the + form of `(width, height, channels)`. + cnn_filter_specifiers: A list in which each element is another (inner) list + of either the following forms: + `[number of channels/filters, kernel, stride]` + OR: + `[number of channels/filters, kernel, stride, padding]`, where `padding` + can either be "same" or "valid". + When using the first format w/o the `padding` specifier, `padding` is "same" + by default. Also, `kernel` and `stride` may be provided either as single + ints (square) or as a tuple/list of two ints (width- and height dimensions) + for non-squared kernel/stride shapes. + A good rule of thumb for constructing CNN stacks is: + When using padding="same", the input "image" will be reduced in size by + the factor `stride`, e.g. input=(84, 84, 3) stride=2 kernel=x padding="same" + filters=16 -> output=(42, 42, 16). + For example, if you would like to reduce an Atari image from its original + (84, 84, 3) dimensions down to (6, 6, F), you can construct the following + stack and reduce the w x h dimension of the image by 2 in each layer: + [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]] -> output=(6, 6, 128) + cnn_use_bias: Whether to use bias on all Conv2D layers. + cnn_activation: The activation function to use after each layer ( + except for the output). The default activation for Conv2d layers is "relu". + cnn_use_layernorm: Whether to insert a LayerNorm functionality + in between each CNN layer's output and its activation. Note that + the output layer. + cnn_kernel_initializer: The initializer function or class to use for kernel + initialization in the CNN layers. If `None` the default initializer of the + respective CNN layer of a framework (`"torch"` or `"tf2"`) is used. Note, + all initializers defined in the framework `"tf2`) are allowed. For `"torch"` + only the in-place initializers, i.e. ending with an underscore "_" are + allowed. + cnn_kernel_initializer_config: Configuration to pass into the initializer + defined in `cnn_kernel_initializer`. + cnn_bias_initializer: The initializer function or class to use for bias + initialization in the CNN layers. If `None` the default initializer of + the respective CNN layer of a framework (`"torch"` or `"tf2"`) is used. + For `"torch"` only the in-place initializers, i.e. ending with an underscore + "_" are allowed. + cnn_bias_initializer_config: Configuration to pass into the initializer defined + in `cnn_bias_initializer`. + flatten_at_end: Whether to flatten the output of the last conv 2D layer into + a 1D tensor. By default, this is True. Note that if you set this to False, + you might simply stack another CNNEncoder on top of this one (maybe with + different activation and bias settings). + """ + + input_dims: Union[List[int], Tuple[int]] = None + cnn_filter_specifiers: List[List[Union[int, List[int]]]] = field( + default_factory=lambda: [[16, [4, 4], 2], [32, [4, 4], 2], [64, [8, 8], 2]] + ) + cnn_use_bias: bool = True + cnn_activation: str = "relu" + cnn_use_layernorm: bool = False + cnn_kernel_initializer: Optional[Union[str, Callable]] = None + cnn_kernel_initializer_config: Optional[Dict] = None + cnn_bias_initializer: Optional[Union[str, Callable]] = None + cnn_bias_initializer_config: Optional[Dict] = None + flatten_at_end: bool = True + + @property + def output_dims(self): + if not self.input_dims: + return None + + # Infer output dims, layer by layer. + dims = self.input_dims # Creates a copy (works for tuple/list). + for filter_spec in self.cnn_filter_specifiers: + # Padding not provided, "same" by default. + if len(filter_spec) == 3: + num_filters, kernel, stride = filter_spec + padding = "same" + # Padding option provided, use given value. + else: + num_filters, kernel, stride, padding = filter_spec + + # Same padding. + if padding == "same": + _, dims = same_padding(dims[:2], kernel, stride) + # Valid padding. + else: + dims = valid_padding(dims[:2], kernel, stride) + + # Add depth (num_filters) to the end (our utility functions for same/valid + # only return the image width/height). + dims = [dims[0], dims[1], num_filters] + + # Flatten everything. + if self.flatten_at_end: + return (int(np.prod(dims)),) + + return tuple(dims) + + def _validate(self, framework: str = "torch"): + if len(self.input_dims) != 3: + raise ValueError( + f"`input_dims` ({self.input_dims}) of CNNEncoderConfig must be a 3D " + "tensor (image) with the dimensions meaning: width x height x " + "channels, e.g. `[64, 64, 3]`!" + ) + if not self.flatten_at_end and len(self.output_dims) != 3: + raise ValueError( + f"`output_dims` ({self.output_dims}) of CNNEncoderConfig must be " + "3D, e.g. `[4, 4, 128]`, b/c your `flatten_at_end` setting is False! " + "`output_dims` is an inferred value, hence other settings might be " + "wrong." + ) + elif self.flatten_at_end and len(self.output_dims) != 1: + raise ValueError( + f"`output_dims` ({self.output_dims}) of CNNEncoderConfig must be " + "1D, e.g. `[32]`, b/c your `flatten_at_end` setting is True! " + "`output_dims` is an inferred value, hence other settings might be " + "wrong." + ) + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Model": + self._validate(framework) + + if framework == "torch": + from ray.rllib.core.models.torch.encoder import TorchCNNEncoder + + return TorchCNNEncoder(self) + + elif framework == "tf2": + from ray.rllib.core.models.tf.encoder import TfCNNEncoder + + return TfCNNEncoder(self) + + +@ExperimentalAPI +@dataclass +class MLPEncoderConfig(_MLPConfig): + """Configuration for an MLP that acts as an encoder. + + See _MLPConfig for usage details. + + Example: + .. testcode:: + + # Configuration: + config = MLPEncoderConfig( + input_dims=[4], # must be 1D tensor + hidden_layer_dims=[16], + hidden_layer_activation="relu", + hidden_layer_use_layernorm=False, + output_layer_dim=None, # maybe None or an int + ) + model = config.build(framework="torch") + + # Resulting stack in pseudocode: + # Linear(4, 16, bias=True) + # ReLU() + + Example: + .. testcode:: + + # Configuration: + config = MLPEncoderConfig( + input_dims=[2], + hidden_layer_dims=[8, 8], + hidden_layer_activation="silu", + hidden_layer_use_layernorm=True, + hidden_layer_use_bias=False, + output_layer_dim=4, + output_layer_activation="tanh", + output_layer_use_bias=False, + ) + model = config.build(framework="tf2") + + # Resulting stack in pseudocode: + # Linear(2, 8, bias=False) + # LayerNorm((8,)) # layernorm always before activation + # SiLU() + # Linear(8, 8, bias=False) + # LayerNorm((8,)) # layernorm always before activation + # SiLU() + # Linear(8, 4, bias=False) + # Tanh() + """ + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Encoder": + self._validate(framework) + + if framework == "torch": + from ray.rllib.core.models.torch.encoder import TorchMLPEncoder + + return TorchMLPEncoder(self) + else: + from ray.rllib.core.models.tf.encoder import TfMLPEncoder + + return TfMLPEncoder(self) + + +@ExperimentalAPI +@dataclass +class RecurrentEncoderConfig(ModelConfig): + """Configuration for an LSTM-based or a GRU-based encoder. + + The encoder consists of... + - Zero or one tokenizers + - N LSTM/GRU layers stacked on top of each other and feeding + their outputs as inputs to the respective next layer. + + This makes for the following flow of tensors: + + Inputs + | + [Tokenizer if present] + | + LSTM layer 1 + | + (...) + | + LSTM layer n + | + Outputs + + The internal state is structued as (num_layers, B, hidden-size) for all hidden + state components, e.g. + h- and c-states of the LSTM layer(s) or h-state of the GRU layer(s). + For example, the hidden states of an LSTMEncoder with num_layers=2 and hidden_dim=8 + would be: {"h": (2, B, 8), "c": (2, B, 8)}. + + `output_dims` is reached by the last recurrent layer's dimension, which is always + the `hidden_dims` value. + + Example: + .. testcode:: + + # Configuration: + config = RecurrentEncoderConfig( + recurrent_layer_type="lstm", + input_dims=[16], # must be 1D tensor + hidden_dim=128, + num_layers=2, + use_bias=True, + ) + model = config.build(framework="torch") + + # Resulting stack in pseudocode: + # LSTM(16, 128, bias=True) + # LSTM(128, 128, bias=True) + + # Resulting shape of the internal states (c- and h-states): + # (2, B, 128) for each c- and h-states. + + Example: + .. testcode:: + + # Configuration: + config = RecurrentEncoderConfig( + recurrent_layer_type="gru", + input_dims=[32], # must be 1D tensor + hidden_dim=64, + num_layers=1, + use_bias=False, + ) + model = config.build(framework="torch") + + # Resulting stack in pseudocode: + # GRU(32, 64, bias=False) + + # Resulting shape of the internal state: + # (1, B, 64) + + Attributes: + input_dims: The input dimensions. Must be 1D. This is the 1D shape of the tensor + that goes into the first recurrent layer. + recurrent_layer_type: The type of the recurrent layer(s). + Either "lstm" or "gru". + hidden_dim: The size of the hidden internal state(s) of the recurrent layer(s). + For example, for an LSTM, this would be the size of the c- and h-tensors. + num_layers: The number of recurrent (LSTM or GRU) layers to stack. + batch_major: Wether the input is batch major (B, T, ..) or + time major (T, B, ..). + hidden_weights_initializer: The initializer function or class to use for + kernel initialization in the hidden layers. If `None` the default + initializer of the respective recurrent layer of a framework (`"torch"` or + `"tf2"`) is used. Note, all initializers defined in the frameworks ( + `"torch"` or `"tf2`) are allowed. For `"torch"` only the in-place + initializers, i.e. ending with an underscore "_" are allowed. + hidden_weights_initializer_config: Configuration to pass into the + initializer defined in `hidden_weights_initializer`. + use_bias: Whether to use bias on the recurrent layers in the network. + hidden_bias_initializer: The initializer function or class to use for bias + initialization in the hidden layers. If `None` the default initializer of + the respective recurrent layer of a framework (`"torch"` or `"tf2"`) is + used. For `"torch"` only the in-place initializers, i.e. ending with an + underscore "_" are allowed. + hidden_bias_initializer_config: Configuration to pass into the initializer + defined in `hidden_bias_initializer`. + tokenizer_config: A ModelConfig to build tokenizers for observations, + actions and other spaces. + """ + + recurrent_layer_type: str = "lstm" + hidden_dim: int = None + num_layers: int = None + batch_major: bool = True + hidden_weights_initializer: Optional[Union[str, Callable]] = None + hidden_weights_initializer_config: Optional[Dict] = None + use_bias: bool = True + hidden_bias_initializer: Optional[Union[str, Callable]] = None + hidden_bias_initializer_config: Optional[Dict] = None + tokenizer_config: ModelConfig = None + + @property + def output_dims(self): + return (self.hidden_dim,) + + def _validate(self, framework: str = "torch"): + """Makes sure that settings are valid.""" + if self.recurrent_layer_type not in ["gru", "lstm"]: + raise ValueError( + f"`recurrent_layer_type` ({self.recurrent_layer_type}) of " + "RecurrentEncoderConfig must be 'gru' or 'lstm'!" + ) + if self.input_dims is not None and len(self.input_dims) != 1: + raise ValueError( + f"`input_dims` ({self.input_dims}) of RecurrentEncoderConfig must be " + "1D, e.g. `[32]`!" + ) + if len(self.output_dims) != 1: + raise ValueError( + f"`output_dims` ({self.output_dims}) of RecurrentEncoderConfig must be " + "1D, e.g. `[32]`! This is an inferred value, hence other settings might" + " be wrong." + ) + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Encoder": + if framework == "torch": + from ray.rllib.core.models.torch.encoder import ( + TorchGRUEncoder as GRU, + TorchLSTMEncoder as LSTM, + ) + else: + from ray.rllib.core.models.tf.encoder import ( + TfGRUEncoder as GRU, + TfLSTMEncoder as LSTM, + ) + + if self.recurrent_layer_type == "lstm": + return LSTM(self) + else: + return GRU(self) + + +@ExperimentalAPI +@dataclass +class ActorCriticEncoderConfig(ModelConfig): + """Configuration for an ActorCriticEncoder. + + The base encoder functions like other encoders in RLlib. It is wrapped by the + ActorCriticEncoder to provides a shared encoder Model to use in RLModules that + provides twofold outputs: one for the actor and one for the critic. See + ModelConfig for usage details. + + Attributes: + base_encoder_config: The configuration for the wrapped encoder(s). + shared: Whether the base encoder is shared between the actor and critic. + inference_only: Whether the configured encoder will only ever be used as an + actor-encoder, never as a value-function encoder. Thus, if True and `shared` + is False, will only build the actor-related components. + """ + + base_encoder_config: ModelConfig = None + shared: bool = True + inference_only: bool = False + + @_framework_implemented() + def build(self, framework: str = "torch") -> "Encoder": + if framework == "torch": + from ray.rllib.core.models.torch.encoder import ( + TorchActorCriticEncoder, + TorchStatefulActorCriticEncoder, + ) + + if isinstance(self.base_encoder_config, RecurrentEncoderConfig): + return TorchStatefulActorCriticEncoder(self) + else: + return TorchActorCriticEncoder(self) + else: + from ray.rllib.core.models.tf.encoder import ( + TfActorCriticEncoder, + TfStatefulActorCriticEncoder, + ) + + if isinstance(self.base_encoder_config, RecurrentEncoderConfig): + return TfStatefulActorCriticEncoder(self) + else: + return TfActorCriticEncoder(self) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87b37f3578a6e087d02e841295057720253cef54 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03b9239a3d533d3f17b43579602e2551600268da Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_base.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_dict.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_dict.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f636a3a70d3cb27502e12d0b4521ecdf2aaaa5a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_dict.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/typing.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/typing.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..034c80350498e09e827375595ec31ef156b40e6a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/typing.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_base.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_base.py new file mode 100644 index 0000000000000000000000000000000000000000..9099da94100237cc6dca66391e6d6bfa772f5544 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_base.py @@ -0,0 +1,226 @@ +import abc +from copy import deepcopy +import numpy as np +from typing import Any, Optional, Dict, List, Tuple, Union, Type +from ray.rllib.utils import try_import_jax, try_import_tf, try_import_torch +from ray.rllib.utils.deprecation import Deprecated +from ray.rllib.utils.typing import TensorType + +torch, _ = try_import_torch() +_, tf, _ = try_import_tf() +jax, _ = try_import_jax() + +_INVALID_INPUT_DUP_DIM = "Duplicate dimension names in shape ({})" +_INVALID_INPUT_UNKNOWN_DIM = "Unknown dimension name {} in shape ({})" +_INVALID_INPUT_POSITIVE = "Dimension {} in ({}) must be positive, got {}" +_INVALID_INPUT_INT_DIM = "Dimension {} in ({}) must be integer, got {}" +_INVALID_SHAPE = "Expected shape {} but found {}" +_INVALID_TYPE = "Expected data type {} but found {}" + + +@Deprecated( + help="The Spec checking APIs have been deprecated and cancelled without " + "replacement.", + error=False, +) +class Spec(abc.ABC): + @staticmethod + @abc.abstractmethod + def validate(self, data: Any) -> None: + pass + + +@Deprecated( + help="The Spec checking APIs have been deprecated and cancelled without " + "replacement.", + error=False, +) +class TypeSpec(Spec): + def __init__(self, dtype: Type) -> None: + self.dtype = dtype + + def __repr__(self): + return f"TypeSpec({str(self.dtype)})" + + def validate(self, data: Any) -> None: + if not isinstance(data, self.dtype): + raise ValueError(_INVALID_TYPE.format(self.dtype, type(data))) + + def __eq__(self, other: "TypeSpec") -> bool: + if not isinstance(other, TypeSpec): + return False + return self.dtype == other.dtype + + def __ne__(self, other: "TypeSpec") -> bool: + return not self == other + + +@Deprecated( + help="The Spec checking APIs have been deprecated and cancelled without " + "replacement.", + error=False, +) +class TensorSpec(Spec): + def __init__( + self, + shape: str, + *, + dtype: Optional[Any] = None, + framework: Optional[str] = None, + **shape_vals: int, + ) -> None: + self._expected_shape = self._parse_expected_shape(shape, shape_vals) + self._full_shape = self._get_full_shape() + self._dtype = dtype + self._framework = framework + + if framework not in ("tf2", "torch", "np", "jax", None): + raise ValueError(f"Unknown framework {self._framework}") + + self._type = self._get_expected_type() + + def _get_expected_type(self) -> Type: + if self._framework == "torch": + return torch.Tensor + elif self._framework == "tf2": + return tf.Tensor + elif self._framework == "np": + return np.ndarray + elif self._framework == "jax": + jax, _ = try_import_jax() + return jax.numpy.ndarray + elif self._framework is None: + # Don't restrict the type of the tensor if no framework is specified. + return object + + def get_shape(self, tensor: TensorType) -> Tuple[int]: + if self._framework == "tf2": + return tuple( + int(i) if i is not None else None for i in tensor.shape.as_list() + ) + return tuple(tensor.shape) + + def get_dtype(self, tensor: TensorType) -> Any: + return tensor.dtype + + @property + def dtype(self) -> Any: + return self._dtype + + @property + def shape(self) -> Tuple[Union[int, str]]: + return self._expected_shape + + @property + def type(self) -> Type: + return self._type + + @property + def full_shape(self) -> Tuple[int]: + return self._full_shape + + def rdrop(self, n: int) -> "TensorSpec": + assert isinstance(n, int) and n >= 0, "n must be a positive integer or zero" + copy_ = deepcopy(self) + copy_._expected_shape = copy_.shape[:-n] + copy_._full_shape = self._get_full_shape() + return copy_ + + def append(self, spec: "TensorSpec") -> "TensorSpec": + copy_ = deepcopy(self) + copy_._expected_shape = (*copy_.shape, *spec.shape) + copy_._full_shape = self._get_full_shape() + return copy_ + + def validate(self, tensor: TensorType) -> None: + if not isinstance(tensor, self.type): + raise ValueError(_INVALID_TYPE.format(self.type, type(tensor).__name__)) + + shape = self.get_shape(tensor) + if len(shape) != len(self._expected_shape): + raise ValueError(_INVALID_SHAPE.format(self._expected_shape, shape)) + + for expected_d, actual_d in zip(self._expected_shape, shape): + if isinstance(expected_d, int) and expected_d != actual_d: + raise ValueError(_INVALID_SHAPE.format(self._expected_shape, shape)) + + dtype = tensor.dtype + if self.dtype and dtype != self.dtype: + raise ValueError(_INVALID_TYPE.format(self.dtype, tensor.dtype)) + + def fill(self, fill_value: Union[float, int] = 0) -> TensorType: + if self._framework == "torch": + return torch.full(self.full_shape, fill_value, dtype=self.dtype) + + elif self._framework == "tf2": + if self.dtype: + return tf.ones(self.full_shape, dtype=self.dtype) * fill_value + return tf.fill(self.full_shape, fill_value) + + elif self._framework == "np": + return np.full(self.full_shape, fill_value, dtype=self.dtype) + + elif self._framework == "jax": + return jax.numpy.full(self.full_shape, fill_value, dtype=self.dtype) + + elif self._framework is None: + raise ValueError( + "Cannot fill tensor without providing `framework` to TensorSpec. " + "This TensorSpec was instantiated without `framework`." + ) + + def _get_full_shape(self) -> Tuple[int]: + sampled_shape = tuple() + for d in self._expected_shape: + if isinstance(d, int): + sampled_shape += (d,) + else: + sampled_shape += (1,) + return sampled_shape + + def _parse_expected_shape(self, shape: str, shape_vals: Dict[str, int]) -> tuple: + d_names = shape.replace(" ", "").split(",") + self._validate_shape_vals(d_names, shape_vals) + + expected_shape = tuple(shape_vals.get(d, d) for d in d_names) + + return expected_shape + + def _validate_shape_vals( + self, d_names: List[str], shape_vals: Dict[str, int] + ) -> None: + d_names_set = set(d_names) + if len(d_names_set) != len(d_names): + raise ValueError(_INVALID_INPUT_DUP_DIM.format(",".join(d_names))) + + for d_name in shape_vals: + if d_name not in d_names_set: + raise ValueError( + _INVALID_INPUT_UNKNOWN_DIM.format(d_name, ",".join(d_names)) + ) + + d_value = shape_vals.get(d_name, None) + if d_value is not None: + if not isinstance(d_value, int): + raise ValueError( + _INVALID_INPUT_INT_DIM.format( + d_name, ",".join(d_names), type(d_value) + ) + ) + if d_value <= 0: + raise ValueError( + _INVALID_INPUT_POSITIVE.format( + d_name, ",".join(d_names), d_value + ) + ) + + def __repr__(self) -> str: + return f"TensorSpec(shape={tuple(self.shape)}, dtype={self.dtype})" + + def __eq__(self, other: "TensorSpec") -> bool: + if not isinstance(other, TensorSpec): + return False + return self.shape == other.shape and self.dtype == other.dtype + + def __ne__(self, other: "TensorSpec") -> bool: + return not self == other diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_dict.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..adc2c46a94122202ac6d371b40fe412560fa257d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_dict.py @@ -0,0 +1,84 @@ +from typing import Any, Dict + +import tree +from ray.rllib.core.models.specs.specs_base import Spec +from ray.rllib.utils import force_tuple + + +_MISSING_KEYS_FROM_DATA = ( + "The data dict does not match the model specs. Keys {} are " + "in the spec dict but not on the data dict. Data keys are {}" +) +_TYPE_MISMATCH = ( + "The data does not match the spec. The data element " + "{} has type {} (expected type {})." +) + +DATA_TYPE = Dict[str, Any] + +IS_NOT_PROPERTY = "Spec {} must be a property of the class {}." + + +class SpecDict(dict, Spec): + def validate( + self, + data: DATA_TYPE, + exact_match: bool = False, + ) -> None: + check = self.is_subset(self, data, exact_match) + if not check[0]: + data_keys_set = set() + + def _map(path, s): + data_keys_set.add(force_tuple(path)) + + tree.map_structure_with_path(_map, data) + + raise ValueError(_MISSING_KEYS_FROM_DATA.format(check[1], data_keys_set)) + + @staticmethod + def is_subset(spec_dict, data_dict, exact_match=False): + if exact_match: + tree.assert_same_structure(data_dict, spec_dict, check_types=False) + + for key in spec_dict: + if key not in data_dict: + return False, key + if spec_dict[key] is None: + continue + + elif isinstance(data_dict[key], dict): + if not isinstance(spec_dict[key], dict): + return False, key + + res = SpecDict.is_subset(spec_dict[key], data_dict[key], exact_match) + if not res[0]: + return res + + elif isinstance(spec_dict[key], dict): + return False, key + + elif isinstance(spec_dict[key], Spec): + try: + spec_dict[key].validate(data_dict[key]) + except ValueError as e: + raise ValueError( + f"Mismatch found in data element {key}, " + f"which is a TensorSpec: {e}" + ) + elif isinstance(spec_dict[key], (type, tuple)): + if not isinstance(data_dict[key], spec_dict[key]): + raise ValueError( + _TYPE_MISMATCH.format( + key, + type(data_dict[key]).__name__, + spec_dict[key].__name__, + ) + ) + else: + raise ValueError( + f"The spec type has to be either TensorSpec or Type. " + f"got {type(spec_dict[key])}" + ) + + return True, None diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/typing.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/typing.py new file mode 100644 index 0000000000000000000000000000000000000000..3975aae27d8c3ef40e1e4be7c85c9fb7828be260 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/typing.py @@ -0,0 +1,10 @@ +from typing import Union, Type, Tuple, List, TYPE_CHECKING + +if TYPE_CHECKING: + from ray.rllib.core.models.specs.specs_base import Spec + + +NestedKeys = List[Union[str, Tuple[str, ...]]] +Constraint = Union[Type, Tuple[Type, ...], "Spec"] +# Either a flat list of nested keys or a tree of constraints +SpecType = Union[NestedKeys] diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bbc40ceb03859fb67d9cccf65378203f78d36d58 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b30597e0183b48aa1588815798952b42babbff82 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/base.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/encoder.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/encoder.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41c45cddba3c566d8a58d25b3b9dd1be536ec534 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/encoder.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/heads.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/heads.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe5bd5d7c47e6f100a86ca70714a88704ed98b58 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/heads.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/primitives.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/primitives.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac04c7269a16c38c47f383034e10a88fb03cbb99 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/primitives.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/base.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/base.py new file mode 100644 index 0000000000000000000000000000000000000000..48e346812c42049812555fe5706e638c7519d650 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/base.py @@ -0,0 +1,53 @@ +import abc +import logging +from typing import Tuple + +import numpy as np + +from ray.rllib.core.models.base import Model +from ray.rllib.core.models.configs import ModelConfig +from ray.rllib.utils.annotations import override +from ray.rllib.utils.framework import try_import_tf + +logger = logging.getLogger(__name__) +_, tf, _ = try_import_tf() + + +class TfModel(Model, tf.keras.Model, abc.ABC): + """Base class for RLlib's TensorFlow models. + + This class defines the interface for RLlib's TensorFlow models and checks + whether inputs and outputs of __call__ are checked with `check_input_specs()` and + `check_output_specs()` respectively. + """ + + def __init__(self, config: ModelConfig): + tf.keras.Model.__init__(self) + Model.__init__(self, config) + + def call(self, input_dict: dict, **kwargs) -> dict: + """Returns the output of this model for the given input. + + This method only makes sure that we have a spec-checked _forward() method. + + Args: + input_dict: The input tensors. + **kwargs: Forward compatibility kwargs. + + Returns: + dict: The output tensors. + """ + return self._forward(input_dict, **kwargs) + + @override(Model) + def get_num_parameters(self) -> Tuple[int, int]: + return ( + sum(int(np.prod(w.shape)) for w in self.trainable_weights), + sum(int(np.prod(w.shape)) for w in self.non_trainable_weights), + ) + + @override(Model) + def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)): + for i, w in enumerate(self.trainable_weights + self.non_trainable_weights): + fill_val = value_sequence[i % len(value_sequence)] + w.assign(tf.fill(w.shape, fill_val)) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/encoder.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..3d280e23cda741098a3231403a4e5991cc548e83 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/encoder.py @@ -0,0 +1,315 @@ +from typing import Dict + +import tree # pip install dm_tree + +from ray.rllib.core.columns import Columns +from ray.rllib.core.models.base import ( + Encoder, + ActorCriticEncoder, + StatefulActorCriticEncoder, + ENCODER_OUT, + tokenize, +) +from ray.rllib.core.models.base import Model +from ray.rllib.core.models.configs import ( + ActorCriticEncoderConfig, + CNNEncoderConfig, + MLPEncoderConfig, + RecurrentEncoderConfig, +) +from ray.rllib.core.models.tf.base import TfModel +from ray.rllib.core.models.tf.primitives import TfMLP, TfCNN +from ray.rllib.models.utils import get_initializer_fn +from ray.rllib.utils.annotations import override +from ray.rllib.utils.framework import try_import_tf + +_, tf, _ = try_import_tf() + + +class TfActorCriticEncoder(TfModel, ActorCriticEncoder): + """An encoder that can hold two encoders.""" + + framework = "tf2" + + def __init__(self, config: ActorCriticEncoderConfig) -> None: + # We have to call TfModel.__init__ first, because it calls the constructor of + # tf.keras.Model, which is required to be called before models are created. + TfModel.__init__(self, config) + ActorCriticEncoder.__init__(self, config) + + +class TfStatefulActorCriticEncoder(TfModel, StatefulActorCriticEncoder): + """A stateful actor-critic encoder for torch.""" + + framework = "tf2" + + def __init__(self, config: ActorCriticEncoderConfig) -> None: + # We have to call TfModel.__init__ first, because it calls the constructor of + # tf.keras.Model, which is required to be called before models are created. + TfModel.__init__(self, config) + StatefulActorCriticEncoder.__init__(self, config) + + +class TfCNNEncoder(TfModel, Encoder): + def __init__(self, config: CNNEncoderConfig) -> None: + TfModel.__init__(self, config) + Encoder.__init__(self, config) + + # Add an input layer for the Sequential, created below. This is really + # important to be able to derive the model's trainable_variables early on + # (inside our Learners). + layers = [tf.keras.layers.Input(shape=config.input_dims)] + # The bare-bones CNN (no flatten, no succeeding dense). + cnn = TfCNN( + input_dims=config.input_dims, + cnn_filter_specifiers=config.cnn_filter_specifiers, + cnn_activation=config.cnn_activation, + cnn_use_layernorm=config.cnn_use_layernorm, + cnn_use_bias=config.cnn_use_bias, + cnn_kernel_initializer=config.cnn_kernel_initializer, + cnn_kernel_initializer_config=config.cnn_kernel_initializer_config, + cnn_bias_initializer=config.cnn_bias_initializer, + cnn_bias_initializer_config=config.cnn_bias_initializer_config, + ) + layers.append(cnn) + + # Add a flatten operation to move from 2/3D into 1D space. + if config.flatten_at_end: + layers.append(tf.keras.layers.Flatten()) + + # Create the network from gathered layers. + self.net = tf.keras.Sequential(layers) + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + return {ENCODER_OUT: self.net(inputs[Columns.OBS])} + + +class TfMLPEncoder(Encoder, TfModel): + def __init__(self, config: MLPEncoderConfig) -> None: + TfModel.__init__(self, config) + Encoder.__init__(self, config) + + # Create the neural network. + self.net = TfMLP( + input_dim=config.input_dims[0], + hidden_layer_dims=config.hidden_layer_dims, + hidden_layer_activation=config.hidden_layer_activation, + hidden_layer_use_layernorm=config.hidden_layer_use_layernorm, + hidden_layer_use_bias=config.hidden_layer_use_bias, + hidden_layer_weights_initializer=config.hidden_layer_weights_initializer, + hidden_layer_weights_initializer_config=( + config.hidden_layer_weights_initializer_config + ), + hidden_layer_bias_initializer=config.hidden_layer_bias_initializer, + hidden_layer_bias_initializer_config=( + config.hidden_layer_bias_initializer_config + ), + output_dim=config.output_layer_dim, + output_activation=config.output_layer_activation, + output_use_bias=config.output_layer_use_bias, + output_weights_initializer=config.output_layer_weights_initializer, + output_weights_initializer_config=( + config.output_layer_weights_initializer_config + ), + output_bias_initializer=config.output_layer_bias_initializer, + output_bias_initializer_config=config.output_layer_bias_initializer_config, + ) + + @override(Model) + def _forward(self, inputs: Dict, **kwargs) -> Dict: + return {ENCODER_OUT: self.net(inputs[Columns.OBS])} + + +class TfGRUEncoder(TfModel, Encoder): + """A recurrent GRU encoder. + + This encoder has... + - Zero or one tokenizers. + - One or more GRU layers. + """ + + def __init__(self, config: RecurrentEncoderConfig) -> None: + TfModel.__init__(self, config) + + # Maybe create a tokenizer + if config.tokenizer_config is not None: + self.tokenizer = config.tokenizer_config.build(framework="tf2") + # For our first input dim, we infer from the tokenizer. + # This is necessary because we need to build the layers in order to be + # able to get/set weights directly after instantiation. + input_dims = (1,) + tuple( + self.tokenizer.output_specs[ENCODER_OUT].full_shape + ) + else: + self.tokenizer = None + input_dims = ( + 1, + 1, + ) + tuple(config.input_dims) + + gru_weights_initializer = get_initializer_fn( + config.hidden_weights_initializer, framework="tf2" + ) + gru_bias_initializer = get_initializer_fn( + config.hidden_bias_initializer, framework="tf2" + ) + + # Create the tf GRU layers. + self.grus = [] + for _ in range(config.num_layers): + layer = tf.keras.layers.GRU( + config.hidden_dim, + time_major=not config.batch_major, + # Note, if the initializer is `None`, we want TensorFlow + # to use its default one. So we pass in `None`. + kernel_initializer=( + gru_weights_initializer(**config.hidden_weights_initializer_config) + if config.hidden_weights_initializer_config + else gru_weights_initializer + ), + use_bias=config.use_bias, + bias_initializer=( + gru_bias_initializer(**config.hidden_bias_initializer_config) + if config.hidden_bias_initializer_config + else gru_bias_initializer + ), + return_sequences=True, + return_state=True, + ) + layer.build(input_dims) + input_dims = (1, 1, config.hidden_dim) + self.grus.append(layer) + + @override(Model) + def get_initial_state(self): + return { + "h": tf.zeros((self.config.num_layers, self.config.hidden_dim)), + } + + @override(Model) + def _forward(self, inputs: Dict, **kwargs) -> Dict: + outputs = {} + + if self.tokenizer is not None: + # Push observations through the tokenizer encoder if we built one. + out = tokenize(self.tokenizer, inputs, framework="tf2") + else: + # Otherwise, just use the raw observations. + out = tf.cast(inputs[Columns.OBS], tf.float32) + + # States are batch-first when coming in. Make them layers-first. + states_in = tree.map_structure( + lambda s: tf.transpose(s, perm=[1, 0] + list(range(2, len(s.shape)))), + inputs[Columns.STATE_IN], + ) + + states_out = [] + for i, layer in enumerate(self.grus): + out, h = layer(out, states_in["h"][i]) + states_out.append(h) + + # Insert them into the output dict. + outputs[ENCODER_OUT] = out + outputs[Columns.STATE_OUT] = {"h": tf.stack(states_out, 1)} + return outputs + + +class TfLSTMEncoder(TfModel, Encoder): + """A recurrent LSTM encoder. + + This encoder has... + - Zero or one tokenizers. + - One or more LSTM layers. + """ + + def __init__(self, config: RecurrentEncoderConfig) -> None: + TfModel.__init__(self, config) + + # Maybe create a tokenizer + if config.tokenizer_config is not None: + self.tokenizer = config.tokenizer_config.build(framework="tf2") + # For our first input dim, we infer from the tokenizer. + # This is necessary because we need to build the layers in order to be + # able to get/set weights directly after instantiation. + input_dims = (1,) + tuple( + self.tokenizer.output_specs[ENCODER_OUT].full_shape + ) + else: + self.tokenizer = None + input_dims = ( + 1, + 1, + ) + tuple(config.input_dims) + + lstm_weights_initializer = get_initializer_fn( + config.hidden_weights_initializer, framework="tf2" + ) + lstm_bias_initializer = get_initializer_fn( + config.hidden_bias_initializer, framework="tf2" + ) + + # Create the tf LSTM layers. + self.lstms = [] + for _ in range(config.num_layers): + layer = tf.keras.layers.LSTM( + config.hidden_dim, + time_major=not config.batch_major, + # Note, if the initializer is `None`, we want TensorFlow + # to use its default one. So we pass in `None`. + kernel_initializer=( + lstm_weights_initializer(**config.hidden_weights_initializer_config) + if config.hidden_weights_initializer_config + else lstm_weights_initializer + ), + use_bias=config.use_bias, + bias_initializer=( + lstm_bias_initializer(**config.hidden_bias_initializer_config) + if config.hidden_bias_initializer_config + else "zeros" + ), + return_sequences=True, + return_state=True, + ) + layer.build(input_dims) + input_dims = (1, 1, config.hidden_dim) + self.lstms.append(layer) + + @override(Model) + def get_initial_state(self): + return { + "h": tf.zeros((self.config.num_layers, self.config.hidden_dim)), + "c": tf.zeros((self.config.num_layers, self.config.hidden_dim)), + } + + @override(Model) + def _forward(self, inputs: Dict, **kwargs) -> Dict: + outputs = {} + + if self.tokenizer is not None: + # Push observations through the tokenizer encoder if we built one. + out = tokenize(self.tokenizer, inputs, framework="tf2") + else: + # Otherwise, just use the raw observations. + out = tf.cast(inputs[Columns.OBS], tf.float32) + + # States are batch-first when coming in. Make them layers-first. + states_in = tree.map_structure( + lambda s: tf.transpose(s, perm=[1, 0, 2]), + inputs[Columns.STATE_IN], + ) + + states_out_h = [] + states_out_c = [] + for i, layer in enumerate(self.lstms): + out, h, c = layer(out, (states_in["h"][i], states_in["c"][i])) + states_out_h.append(h) + states_out_c.append(c) + + # Insert them into the output dict. + outputs[ENCODER_OUT] = out + outputs[Columns.STATE_OUT] = { + "h": tf.stack(states_out_h, 1), + "c": tf.stack(states_out_c, 1), + } + return outputs diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/heads.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/heads.py new file mode 100644 index 0000000000000000000000000000000000000000..e92ee5e0577eca5de10e12584338e88b94dbde06 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/heads.py @@ -0,0 +1,198 @@ +import numpy as np + +from ray.rllib.core.models.base import Model +from ray.rllib.core.models.configs import ( + CNNTransposeHeadConfig, + FreeLogStdMLPHeadConfig, + MLPHeadConfig, +) +from ray.rllib.core.models.tf.base import TfModel +from ray.rllib.core.models.tf.primitives import TfCNNTranspose, TfMLP +from ray.rllib.models.utils import get_initializer_fn +from ray.rllib.utils import try_import_tf +from ray.rllib.utils.annotations import override + +tf1, tf, tfv = try_import_tf() + + +class TfMLPHead(TfModel): + def __init__(self, config: MLPHeadConfig) -> None: + TfModel.__init__(self, config) + + self.net = TfMLP( + input_dim=config.input_dims[0], + hidden_layer_dims=config.hidden_layer_dims, + hidden_layer_activation=config.hidden_layer_activation, + hidden_layer_use_layernorm=config.hidden_layer_use_layernorm, + hidden_layer_use_bias=config.hidden_layer_use_bias, + hidden_layer_weights_initializer=config.hidden_layer_weights_initializer, + hidden_layer_weights_initializer_config=( + config.hidden_layer_weights_initializer_config + ), + hidden_layer_bias_initializer=config.hidden_layer_bias_initializer, + hidden_layer_bias_initializer_config=( + config.hidden_layer_bias_initializer_config + ), + output_dim=config.output_layer_dim, + output_activation=config.output_layer_activation, + output_use_bias=config.output_layer_use_bias, + output_weights_initializer=config.output_layer_weights_initializer, + output_weights_initializer_config=( + config.output_layer_weights_initializer_config + ), + output_bias_initializer=config.output_layer_bias_initializer, + output_bias_initializer_config=config.output_layer_bias_initializer_config, + ) + # If log standard deviations should be clipped. This should be only true for + # policy heads. Value heads should never be clipped. + self.clip_log_std = config.clip_log_std + # The clipping parameter for the log standard deviation. + self.log_std_clip_param = tf.constant([config.log_std_clip_param]) + + @override(Model) + def _forward(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: + # Only clip the log standard deviations, if the user wants to clip. This + # avoids also clipping value heads. + if self.clip_log_std: + # Forward pass. + means, log_stds = tf.split(self.net(inputs), num_or_size_splits=2, axis=-1) + # Clip the log standard deviations. + log_stds = tf.clip_by_value( + log_stds, -self.log_std_clip_param, self.log_std_clip_param + ) + return tf.concat([means, log_stds], axis=-1) + # Otherwise just return the logits. + else: + return self.net(inputs) + + +class TfFreeLogStdMLPHead(TfModel): + """An MLPHead that implements floating log stds for Gaussian distributions.""" + + def __init__(self, config: FreeLogStdMLPHeadConfig) -> None: + TfModel.__init__(self, config) + + assert config.output_dims[0] % 2 == 0, "output_dims must be even for free std!" + self._half_output_dim = config.output_dims[0] // 2 + + self.net = TfMLP( + input_dim=config.input_dims[0], + hidden_layer_dims=config.hidden_layer_dims, + hidden_layer_activation=config.hidden_layer_activation, + hidden_layer_use_layernorm=config.hidden_layer_use_layernorm, + hidden_layer_use_bias=config.hidden_layer_use_bias, + hidden_layer_weights_initializer=config.hidden_layer_weights_initializer, + hidden_layer_weights_initializer_config=( + config.hidden_layer_weights_initializer_config + ), + hidden_layer_bias_initializer=config.hidden_layer_bias_initializer, + hidden_layer_bias_initializer_config=( + config.hidden_layer_bias_initializer_config + ), + output_dim=self._half_output_dim, + output_activation=config.output_layer_activation, + output_use_bias=config.output_layer_use_bias, + output_weights_initializer=config.output_layer_weights_initializer, + output_weights_initializer_config=( + config.output_layer_weights_initializer_config + ), + output_bias_initializer=config.output_layer_bias_initializer, + output_bias_initializer_config=config.output_layer_bias_initializer_config, + ) + + self.log_std = tf.Variable( + tf.zeros(self._half_output_dim), + name="log_std", + dtype=tf.float32, + trainable=True, + ) + # If log standard deviations should be clipped. This should be only true for + # policy heads. Value heads should never be clipped. + self.clip_log_std = config.clip_log_std + # The clipping parameter for the log standard deviation. + self.log_std_clip_param = tf.constant([config.log_std_clip_param]) + + @override(Model) + def _forward(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: + # Compute the mean first, then append the log_std. + mean = self.net(inputs) + # If log standard deviation should be clipped. + if self.clip_log_std: + # Clip log standard deviations to stabilize training. Note, the + # default clip value is `inf`, i.e. no clipping. + log_std = tf.clip_by_value( + self.log_std, -self.log_std_clip_param, self.log_std_clip_param + ) + else: + log_std = self.log_std + log_std_out = tf.tile(tf.expand_dims(log_std, 0), [tf.shape(inputs)[0], 1]) + logits_out = tf.concat([mean, log_std_out], axis=1) + return logits_out + + +class TfCNNTransposeHead(TfModel): + def __init__(self, config: CNNTransposeHeadConfig) -> None: + super().__init__(config) + + # Initial, inactivated Dense layer (always w/ bias). Use the + # hidden layer initializer for this layer. + initial_dense_weights_initializer = get_initializer_fn( + config.initial_dense_weights_initializer, framework="tf2" + ) + initial_dense_bias_initializer = get_initializer_fn( + config.initial_dense_bias_initializer, framework="tf2" + ) + + # This layer is responsible for getting the incoming tensor into a proper + # initial image shape (w x h x filters) for the suceeding Conv2DTranspose stack. + self.initial_dense = tf.keras.layers.Dense( + units=int(np.prod(config.initial_image_dims)), + activation=None, + kernel_initializer=( + initial_dense_weights_initializer( + **config.initial_dense_weights_initializer_config + ) + if config.initial_dense_weights_initializer_config + else initial_dense_weights_initializer + ), + use_bias=True, + bias_initializer=( + initial_dense_bias_initializer( + **config.initial_dense_bias_initializer_config + ) + if config.initial_dense_bias_initializer_config + else initial_dense_bias_initializer + ), + ) + + # The main CNNTranspose stack. + self.cnn_transpose_net = TfCNNTranspose( + input_dims=config.initial_image_dims, + cnn_transpose_filter_specifiers=config.cnn_transpose_filter_specifiers, + cnn_transpose_activation=config.cnn_transpose_activation, + cnn_transpose_use_layernorm=config.cnn_transpose_use_layernorm, + cnn_transpose_use_bias=config.cnn_transpose_use_bias, + cnn_transpose_kernel_initializer=config.cnn_transpose_kernel_initializer, + cnn_transpose_kernel_initializer_config=( + config.cnn_transpose_kernel_initializer_config + ), + cnn_transpose_bias_initializer=config.cnn_transpose_bias_initializer, + cnn_transpose_bias_initializer_config=( + config.cnn_transpose_bias_initializer_config + ), + ) + + @override(Model) + def _forward(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: + # Push through initial dense layer to get dimensions of first "image". + out = self.initial_dense(inputs) + # Reshape to initial 3D (image-like) format to enter CNN transpose stack. + out = tf.reshape( + out, + shape=(-1,) + tuple(self.config.initial_image_dims), + ) + # Push through CNN transpose stack. + out = self.cnn_transpose_net(out) + # Add 0.5 to center the (always non-activated, non-normalized) outputs more + # around 0.0. + return out + 0.5 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/primitives.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/primitives.py new file mode 100644 index 0000000000000000000000000000000000000000..1c5d61bf4f4964f648a4d9ae09f72e64f49072b7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/primitives.py @@ -0,0 +1,429 @@ +from typing import Callable, Dict, List, Optional, Tuple, Union + +from ray.rllib.models.utils import get_activation_fn, get_initializer_fn +from ray.rllib.utils.framework import try_import_tf + +_, tf, _ = try_import_tf() + + +class TfMLP(tf.keras.Model): + """A multi-layer perceptron with N dense layers. + + All layers (except for an optional additional extra output layer) share the same + activation function, bias setup (use bias or not), and LayerNorm setup + (use layer normalization or not). + + If `output_dim` (int) is not None, an additional, extra output dense layer is added, + which might have its own activation function (e.g. "linear"). However, the output + layer does NOT use layer normalization. + """ + + def __init__( + self, + *, + input_dim: int, + hidden_layer_dims: List[int], + hidden_layer_use_layernorm: bool = False, + hidden_layer_use_bias: bool = True, + hidden_layer_activation: Optional[Union[str, Callable]] = "relu", + hidden_layer_weights_initializer: Optional[Union[str, Callable]] = None, + hidden_layer_weights_initializer_config: Optional[Dict] = None, + hidden_layer_bias_initializer: Optional[Union[str, Callable]] = None, + hidden_layer_bias_initializer_config: Optional[Dict] = None, + output_dim: Optional[int] = None, + output_use_bias: bool = True, + output_activation: Optional[Union[str, Callable]] = "linear", + output_weights_initializer: Optional[Union[str, Callable]] = None, + output_weights_initializer_config: Optional[Dict] = None, + output_bias_initializer: Optional[Union[str, Callable]] = None, + output_bias_initializer_config: Optional[Dict] = None, + ): + """Initialize a TfMLP object. + + Args: + input_dim: The input dimension of the network. Must not be None. + hidden_layer_dims: The sizes of the hidden layers. If an empty list, only a + single layer will be built of size `output_dim`. + hidden_layer_use_layernorm: Whether to insert a LayerNormalization + functionality in between each hidden layer's output and its activation. + hidden_layer_use_bias: Whether to use bias on all dense layers (excluding + the possible separate output layer). + hidden_layer_activation: The activation function to use after each layer + (except for the output). Either a tf.nn.[activation fn] callable or a + string that's supported by tf.keras.layers.Activation(activation=...), + e.g. "relu", "ReLU", "silu", or "linear". + hidden_layer_weights_initializer: The initializer function or class to use + for weights initialization in the hidden layers. If `None` the default + initializer of the respective dense layer is used. Note, all + initializers defined in `tf.keras.initializers` are allowed. + hidden_layer_weights_initializer_config: Configuration to pass into the + initializer defined in `hidden_layer_weights_initializer`. + hidden_layer_bias_initializer: The initializer function or class to use for + bias initialization in the hidden layers. If `None` the default + initializer of the respective dense layer is used. Note, all + initializers defined in `tf.keras.initializers` are allowed. + hidden_layer_bias_initializer_config: Configuration to pass into the + initializer defined in `hidden_layer_bias_initializer`. + output_dim: The output dimension of the network. If None, no specific output + layer will be added and the last layer in the stack will have + size=`hidden_layer_dims[-1]`. + output_use_bias: Whether to use bias on the separate output layer, + if any. + output_activation: The activation function to use for the output layer + (if any). Either a tf.nn.[activation fn] callable or a string that's + supported by tf.keras.layers.Activation(activation=...), e.g. "relu", + "ReLU", "silu", or "linear". + output_layer_weights_initializer: The initializer function or class to use + for weights initialization in the output layers. If `None` the default + initializer of the respective dense layer is used. Note, all + initializers defined in `tf.keras.initializers` are allowed. + output_layer_weights_initializer_config: Configuration to pass into the + initializer defined in `output_layer_weights_initializer`. + output_layer_bias_initializer: The initializer function or class to use for + bias initialization in the output layers. If `None` the default + initializer of the respective dense layer is used. Note, all + initializers defined in `tf.keras.initializers` are allowed. + output_layer_bias_initializer_config: Configuration to pass into the + initializer defined in `output_layer_bias_initializer`. + """ + super().__init__() + assert input_dim > 0 + + layers = [] + # Input layer. + layers.append(tf.keras.Input(shape=(input_dim,))) + + hidden_activation = get_activation_fn(hidden_layer_activation, framework="tf2") + hidden_weights_initializer = get_initializer_fn( + hidden_layer_weights_initializer, framework="tf2" + ) + hidden_bias_initializer = get_initializer_fn( + hidden_layer_bias_initializer, framework="tf2" + ) + + for i in range(len(hidden_layer_dims)): + # Dense layer with activation (or w/o in case we use LayerNorm, in which + # case the activation is applied after the layer normalization step). + layers.append( + tf.keras.layers.Dense( + hidden_layer_dims[i], + activation=( + hidden_activation if not hidden_layer_use_layernorm else None + ), + # Note, if the initializer is `None`, we want TensorFlow + # to use its default one. So we pass in `None`. + kernel_initializer=( + hidden_weights_initializer( + **hidden_layer_weights_initializer_config + ) + if hidden_layer_weights_initializer_config + else hidden_weights_initializer + ), + use_bias=hidden_layer_use_bias, + bias_initializer=( + hidden_bias_initializer(**hidden_layer_bias_initializer_config) + if hidden_layer_bias_initializer_config + else hidden_bias_initializer + ), + ) + ) + # Add LayerNorm and activation. + if hidden_layer_use_layernorm: + # Use epsilon=1e-5 here (instead of default 1e-3) to be unified + # with torch. + layers.append(tf.keras.layers.LayerNormalization(epsilon=1e-5)) + layers.append(tf.keras.layers.Activation(hidden_activation)) + + output_weights_initializer = get_initializer_fn( + output_weights_initializer, framework="tf2" + ) + output_bias_initializer = get_initializer_fn( + output_bias_initializer, framework="tf2" + ) + + if output_dim is not None: + output_activation = get_activation_fn(output_activation, framework="tf2") + layers.append( + tf.keras.layers.Dense( + output_dim, + activation=output_activation, + # Note, if the initializer is `None`, we want TensorFlow + # to use its default one. So we pass in `None`. + kernel_initializer=( + output_weights_initializer(**output_weights_initializer_config) + if output_weights_initializer_config + else output_weights_initializer + ), + use_bias=output_use_bias, + bias_initializer=( + output_bias_initializer(**output_bias_initializer_config) + if output_bias_initializer_config + else output_bias_initializer + ), + ) + ) + + self.network = tf.keras.Sequential(layers) + + def call(self, inputs, **kwargs): + return self.network(inputs) + + +class TfCNN(tf.keras.Model): + """A model containing a CNN with N Conv2D layers. + + All layers share the same activation function, bias setup (use bias or not), and + LayerNormalization setup (use layer normalization or not). + + Note that there is no flattening nor an additional dense layer at the end of the + stack. The output of the network is a 3D tensor of dimensions [width x height x num + output filters]. + """ + + def __init__( + self, + *, + input_dims: Union[List[int], Tuple[int]], + cnn_filter_specifiers: List[List[Union[int, List]]], + cnn_use_bias: bool = True, + cnn_use_layernorm: bool = False, + cnn_activation: Optional[str] = "relu", + cnn_kernel_initializer: Optional[Union[str, Callable]] = None, + cnn_kernel_initializer_config: Optional[Dict] = None, + cnn_bias_initializer: Optional[Union[str, Callable]] = None, + cnn_bias_initializer_config: Optional[Dict] = None, + ): + """Initializes a TfCNN instance. + + Args: + input_dims: The 3D input dimensions of the network (incoming image). + cnn_filter_specifiers: A list in which each element is another (inner) list + of either the following forms: + `[number of channels/filters, kernel, stride]` + OR: + `[number of channels/filters, kernel, stride, padding]`, where `padding` + can either be "same" or "valid". + When using the first format w/o the `padding` specifier, `padding` is + "same" by default. Also, `kernel` and `stride` may be provided either as + single ints (square) or as a tuple/list of two ints (width- and height + dimensions) for non-squared kernel/stride shapes. + A good rule of thumb for constructing CNN stacks is: + When using padding="same", the input "image" will be reduced in size by + the factor `stride`, e.g. input=(84, 84, 3) stride=2 kernel=x + padding="same" filters=16 -> output=(42, 42, 16). + For example, if you would like to reduce an Atari image from its + original (84, 84, 3) dimensions down to (6, 6, F), you can construct the + following stack and reduce the w x h dimension of the image by 2 in each + layer: + [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]] -> output=(6, 6, 128) + cnn_use_bias: Whether to use bias on all Conv2D layers. + cnn_activation: The activation function to use after each Conv2D layer. + cnn_use_layernorm: Whether to insert a LayerNormalization functionality + in between each Conv2D layer's outputs and its activation. + cnn_kernel_initializer: The initializer function or class to use for kernel + initialization in the CNN layers. If `None` the default initializer of + the respective CNN layer is used. Note, all initializers defined in + `tf.keras.initializers` are allowed. + cnn_kernel_initializer_config: Configuration to pass into the initializer + defined in `cnn_kernel_initializer`. + cnn_bias_initializer: The initializer function or class to use for bias + initialization in the CNN layers. If `None` the default initializer of + the respective CNN layer is used. Note, all initializers defined in + `tf.keras.initializers` are allowed. + cnn_bias_initializer_config: Configuration to pass into the initializer + defined in `cnn_bias_initializer`. + """ + super().__init__() + + assert len(input_dims) == 3 + + cnn_activation = get_activation_fn(cnn_activation, framework="tf2") + cnn_kernel_initializer = get_initializer_fn( + cnn_kernel_initializer, framework="tf2" + ) + cnn_bias_initializer = get_initializer_fn(cnn_bias_initializer, framework="tf2") + + layers = [] + + # Input layer. + layers.append(tf.keras.layers.Input(shape=input_dims)) + + for filter_specs in cnn_filter_specifiers: + # Padding information not provided -> Use "same" as default. + if len(filter_specs) == 3: + num_filters, kernel_size, strides = filter_specs + padding = "same" + # Padding information provided. + else: + num_filters, kernel_size, strides, padding = filter_specs + + layers.append( + tf.keras.layers.Conv2D( + filters=num_filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + use_bias=cnn_use_bias, + activation=None if cnn_use_layernorm else cnn_activation, + # Note, if the initializer is `None`, we want TensorFlow + # to use its default one. So we pass in `None`. + kernel_initializer=( + cnn_kernel_initializer(**cnn_kernel_initializer_config) + if cnn_kernel_initializer_config + else cnn_kernel_initializer + ), + bias_initializer=( + cnn_bias_initializer(**cnn_bias_initializer_config) + if cnn_bias_initializer_config + else cnn_bias_initializer + ), + ) + ) + if cnn_use_layernorm: + # Use epsilon=1e-5 here (instead of default 1e-3) to be unified with + # torch. Need to normalize over all axes. + layers.append( + tf.keras.layers.LayerNormalization(axis=[-3, -2, -1], epsilon=1e-5) + ) + layers.append(tf.keras.layers.Activation(cnn_activation)) + + # Create the final CNN network. + self.cnn = tf.keras.Sequential(layers) + + self.expected_input_dtype = tf.float32 + + def call(self, inputs, **kwargs): + return self.cnn(tf.cast(inputs, self.expected_input_dtype)) + + +class TfCNNTranspose(tf.keras.Model): + """A model containing a CNNTranspose with N Conv2DTranspose layers. + + All layers share the same activation function, bias setup (use bias or not), and + LayerNormalization setup (use layer normalization or not), except for the last one, + which is never activated and never layer norm'd. + + Note that there is no reshaping/flattening nor an additional dense layer at the + beginning or end of the stack. The input as well as output of the network are 3D + tensors of dimensions [width x height x num output filters]. + """ + + def __init__( + self, + *, + input_dims: Union[List[int], Tuple[int]], + cnn_transpose_filter_specifiers: List[List[Union[int, List]]], + cnn_transpose_use_bias: bool = True, + cnn_transpose_activation: Optional[str] = "relu", + cnn_transpose_use_layernorm: bool = False, + cnn_transpose_kernel_initializer: Optional[Union[str, Callable]] = None, + cnn_transpose_kernel_initializer_config: Optional[Dict] = None, + cnn_transpose_bias_initializer: Optional[Union[str, Callable]] = None, + cnn_transpose_bias_initializer_config: Optional[Dict] = None, + ): + """Initializes a TfCNNTranspose instance. + + Args: + input_dims: The 3D input dimensions of the network (incoming image). + cnn_transpose_filter_specifiers: A list of lists, where each item represents + one Conv2DTranspose layer. Each such Conv2DTranspose layer is further + specified by the elements of the inner lists. The inner lists follow + the format: `[number of filters, kernel, stride]` to + specify a convolutional-transpose layer stacked in order of the + outer list. + `kernel` as well as `stride` might be provided as width x height tuples + OR as single ints representing both dimension (width and height) + in case of square shapes. + cnn_transpose_use_bias: Whether to use bias on all Conv2DTranspose layers. + cnn_transpose_use_layernorm: Whether to insert a LayerNormalization + functionality in between each Conv2DTranspose layer's outputs and its + activation. + The last Conv2DTranspose layer will not be normed, regardless. + cnn_transpose_activation: The activation function to use after each layer + (except for the last Conv2DTranspose layer, which is always + non-activated). + cnn_transpose_kernel_initializer: The initializer function or class to use + for kernel initialization in the CNN layers. If `None` the default + initializer of the respective CNN layer is used. Note, all initializers + defined in `tf.keras.initializers` are allowed. + cnn_transpose_kernel_initializer_config: Configuration to pass into the + initializer defined in `cnn_transpose_kernel_initializer`. + cnn_transpose_bias_initializer: The initializer function or class to use for + bias initialization in the CNN layers. If `None` the default initializer + of the respective CNN layer is used. Note, only the in-place + initializers, i.e. ending with an underscore "_" are allowed. + cnn_transpose_bias_initializer_config: Configuration to pass into the + initializer defined in `cnn_transpose_bias_initializer`. + """ + super().__init__() + + assert len(input_dims) == 3 + + cnn_transpose_activation = get_activation_fn( + cnn_transpose_activation, framework="tf2" + ) + cnn_transpose_kernel_initializer = get_initializer_fn( + cnn_transpose_kernel_initializer, + framework="tf2", + ) + cnn_transpose_bias_initializer = get_initializer_fn( + cnn_transpose_bias_initializer, framework="tf2" + ) + + layers = [] + + # Input layer. + layers.append(tf.keras.layers.Input(shape=input_dims)) + + for i, (num_filters, kernel_size, strides) in enumerate( + cnn_transpose_filter_specifiers + ): + is_final_layer = i == len(cnn_transpose_filter_specifiers) - 1 + layers.append( + tf.keras.layers.Conv2DTranspose( + filters=num_filters, + kernel_size=kernel_size, + strides=strides, + padding="same", + # Last layer is never activated (regardless of config). + activation=( + None + if cnn_transpose_use_layernorm or is_final_layer + else cnn_transpose_activation + ), + # Note, if the initializer is `None`, we want TensorFlow + # to use its default one. So we pass in `None`. + kernel_initializer=( + cnn_transpose_kernel_initializer( + **cnn_transpose_kernel_initializer_config + ) + if cnn_transpose_kernel_initializer_config + else cnn_transpose_kernel_initializer + ), + # Last layer always uses bias (b/c has no LayerNorm, regardless of + # config). + use_bias=cnn_transpose_use_bias or is_final_layer, + bias_initializer=( + cnn_transpose_bias_initializer( + **cnn_transpose_bias_initializer_config + ) + if cnn_transpose_bias_initializer_config + else cnn_transpose_bias_initializer + ), + ) + ) + if cnn_transpose_use_layernorm and not is_final_layer: + # Use epsilon=1e-5 here (instead of default 1e-3) to be unified with + # torch. Need to normalize over all axes. + layers.append( + tf.keras.layers.LayerNormalization(axis=[-3, -2, -1], epsilon=1e-5) + ) + layers.append(tf.keras.layers.Activation(cnn_transpose_activation)) + + # Create the final CNNTranspose network. + self.cnn_transpose = tf.keras.Sequential(layers) + + self.expected_input_dtype = tf.float32 + + def call(self, inputs, **kwargs): + return self.cnn_transpose(tf.cast(inputs, self.expected_input_dtype)) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b3dbfebf5a60865a7461b0d733bdd14325d1f13 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/base.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/encoder.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/encoder.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..360e8126a45a5d1f600fa14ea245923d7607c632 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/encoder.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/primitives.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/primitives.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..732b2eadcac1d9b2ec49e9485c083da0260e0849 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/primitives.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/base.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a737ed6cfc9126fc6fce7080ab224b50c3dcc56f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/base.py @@ -0,0 +1,98 @@ +import abc +import logging +from typing import Tuple, Union + +import numpy as np + +from ray.rllib.core.models.base import Model +from ray.rllib.core.models.configs import ModelConfig +from ray.rllib.utils.annotations import override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.typing import TensorType + +torch, nn = try_import_torch() + +logger = logging.getLogger(__name__) + + +class TorchModel(nn.Module, Model, abc.ABC): + """Base class for RLlib's PyTorch models. + + This class defines the interface for RLlib's PyTorch models and checks + whether inputs and outputs of forward are checked with `check_input_specs()` and + `check_output_specs()` respectively. + + Example usage for a single Flattening layer: + + .. testcode:: + + from ray.rllib.core.models.configs import ModelConfig + from ray.rllib.core.models.torch.base import TorchModel + import torch + + class FlattenModelConfig(ModelConfig): + def build(self, framework: str): + assert framework == "torch" + return TorchFlattenModel(self) + + class TorchFlattenModel(TorchModel): + def __init__(self, config): + TorchModel.__init__(self, config) + self.flatten_layer = torch.nn.Flatten() + + def _forward(self, inputs, **kwargs): + return self.flatten_layer(inputs) + + model = FlattenModelConfig().build("torch") + inputs = torch.Tensor([[[1, 2]]]) + print(model(inputs)) + + .. testoutput:: + + tensor([[1., 2.]]) + + """ + + def __init__(self, config: ModelConfig): + """Initialized a TorchModel. + + Args: + config: The ModelConfig to use. + """ + nn.Module.__init__(self) + Model.__init__(self, config) + + def forward( + self, inputs: Union[dict, TensorType], **kwargs + ) -> Union[dict, TensorType]: + """Returns the output of this model for the given input. + + This method only makes sure that we have a spec-checked _forward() method. + + Args: + inputs: The input tensors. + **kwargs: Forward compatibility kwargs. + + Returns: + dict: The output tensors. + """ + return self._forward(inputs, **kwargs) + + @override(Model) + def get_num_parameters(self) -> Tuple[int, int]: + num_all_params = sum(int(np.prod(p.size())) for p in self.parameters()) + trainable_params = filter(lambda p: p.requires_grad, self.parameters()) + num_trainable_params = sum(int(np.prod(p.size())) for p in trainable_params) + return ( + num_trainable_params, + num_all_params - num_trainable_params, + ) + + @override(Model) + def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)): + trainable_weights = [p for p in self.parameters() if p.requires_grad] + non_trainable_weights = [p for p in self.parameters() if not p.requires_grad] + for i, w in enumerate(trainable_weights + non_trainable_weights): + fill_val = value_sequence[i % len(value_sequence)] + with torch.no_grad(): + w.fill_(fill_val) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/encoder.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..82812e43fc61ec4dc80f8395735696cc25935493 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/encoder.py @@ -0,0 +1,284 @@ +import tree + +from ray.rllib.core.columns import Columns +from ray.rllib.core.models.base import ( + Encoder, + ActorCriticEncoder, + StatefulActorCriticEncoder, + ENCODER_OUT, +) +from ray.rllib.core.models.base import Model, tokenize +from ray.rllib.core.models.configs import ( + ActorCriticEncoderConfig, + CNNEncoderConfig, + MLPEncoderConfig, + RecurrentEncoderConfig, +) +from ray.rllib.core.models.torch.base import TorchModel +from ray.rllib.core.models.torch.primitives import TorchMLP, TorchCNN +from ray.rllib.models.utils import get_initializer_fn +from ray.rllib.utils.annotations import override +from ray.rllib.utils.framework import try_import_torch + +torch, nn = try_import_torch() + + +class TorchActorCriticEncoder(TorchModel, ActorCriticEncoder): + """An actor-critic encoder for torch.""" + + framework = "torch" + + def __init__(self, config: ActorCriticEncoderConfig) -> None: + TorchModel.__init__(self, config) + ActorCriticEncoder.__init__(self, config) + + +class TorchStatefulActorCriticEncoder(TorchModel, StatefulActorCriticEncoder): + """A stateful actor-critic encoder for torch.""" + + framework = "torch" + + def __init__(self, config: ActorCriticEncoderConfig) -> None: + TorchModel.__init__(self, config) + StatefulActorCriticEncoder.__init__(self, config) + + +class TorchMLPEncoder(TorchModel, Encoder): + def __init__(self, config: MLPEncoderConfig) -> None: + TorchModel.__init__(self, config) + Encoder.__init__(self, config) + + # Create the neural network. + self.net = TorchMLP( + input_dim=config.input_dims[0], + hidden_layer_dims=config.hidden_layer_dims, + hidden_layer_activation=config.hidden_layer_activation, + hidden_layer_use_layernorm=config.hidden_layer_use_layernorm, + hidden_layer_use_bias=config.hidden_layer_use_bias, + hidden_layer_weights_initializer=config.hidden_layer_weights_initializer, + hidden_layer_weights_initializer_config=( + config.hidden_layer_weights_initializer_config + ), + hidden_layer_bias_initializer=config.hidden_layer_bias_initializer, + hidden_layer_bias_initializer_config=( + config.hidden_layer_bias_initializer_config + ), + output_dim=config.output_layer_dim, + output_activation=config.output_layer_activation, + output_use_bias=config.output_layer_use_bias, + output_weights_initializer=config.output_layer_weights_initializer, + output_weights_initializer_config=( + config.output_layer_weights_initializer_config + ), + output_bias_initializer=config.output_layer_bias_initializer, + output_bias_initializer_config=config.output_layer_bias_initializer_config, + ) + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + return {ENCODER_OUT: self.net(inputs[Columns.OBS])} + + +class TorchCNNEncoder(TorchModel, Encoder): + def __init__(self, config: CNNEncoderConfig) -> None: + TorchModel.__init__(self, config) + Encoder.__init__(self, config) + + layers = [] + # The bare-bones CNN (no flatten, no succeeding dense). + cnn = TorchCNN( + input_dims=config.input_dims, + cnn_filter_specifiers=config.cnn_filter_specifiers, + cnn_activation=config.cnn_activation, + cnn_use_layernorm=config.cnn_use_layernorm, + cnn_use_bias=config.cnn_use_bias, + cnn_kernel_initializer=config.cnn_kernel_initializer, + cnn_kernel_initializer_config=config.cnn_kernel_initializer_config, + cnn_bias_initializer=config.cnn_bias_initializer, + cnn_bias_initializer_config=config.cnn_bias_initializer_config, + ) + layers.append(cnn) + + # Add a flatten operation to move from 2/3D into 1D space. + if config.flatten_at_end: + layers.append(nn.Flatten()) + + # Create the network from gathered layers. + self.net = nn.Sequential(*layers) + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + return {ENCODER_OUT: self.net(inputs[Columns.OBS])} + + +class TorchGRUEncoder(TorchModel, Encoder): + """A recurrent GRU encoder. + + This encoder has... + - Zero or one tokenizers. + - One or more GRU layers. + """ + + def __init__(self, config: RecurrentEncoderConfig) -> None: + TorchModel.__init__(self, config) + + # Maybe create a tokenizer + if config.tokenizer_config is not None: + self.tokenizer = config.tokenizer_config.build(framework="torch") + gru_input_dims = config.tokenizer_config.output_dims + else: + self.tokenizer = None + gru_input_dims = config.input_dims + + # We only support 1D spaces right now. + assert len(gru_input_dims) == 1 + gru_input_dim = gru_input_dims[0] + + gru_weights_initializer = get_initializer_fn( + config.hidden_weights_initializer, framework="torch" + ) + gru_bias_initializer = get_initializer_fn( + config.hidden_bias_initializer, framework="torch" + ) + + # Create the torch GRU layer. + self.gru = nn.GRU( + gru_input_dim, + config.hidden_dim, + config.num_layers, + batch_first=config.batch_major, + bias=config.use_bias, + ) + + # Initialize, GRU weights, if necessary. + if gru_weights_initializer: + gru_weights_initializer( + self.gru.weight, **config.hidden_weights_initializer_config or {} + ) + # Initialize GRU bias, if necessary. + if gru_bias_initializer: + gru_bias_initializer( + self.gru.weight, **config.hidden_bias_initializer_config or {} + ) + + @override(Model) + def get_initial_state(self): + return { + "h": torch.zeros(self.config.num_layers, self.config.hidden_dim), + } + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + outputs = {} + + if self.tokenizer is not None: + # Push observations through the tokenizer encoder if we built one. + out = tokenize(self.tokenizer, inputs, framework="torch") + else: + # Otherwise, just use the raw observations. + out = inputs[Columns.OBS].float() + + # States are batch-first when coming in. Make them layers-first. + states_in = tree.map_structure( + lambda s: s.transpose(0, 1), inputs[Columns.STATE_IN] + ) + + out, states_out = self.gru(out, states_in["h"]) + states_out = {"h": states_out} + + # Insert them into the output dict. + outputs[ENCODER_OUT] = out + outputs[Columns.STATE_OUT] = tree.map_structure( + lambda s: s.transpose(0, 1), states_out + ) + return outputs + + +class TorchLSTMEncoder(TorchModel, Encoder): + """A recurrent LSTM encoder. + + This encoder has... + - Zero or one tokenizers. + - One or more LSTM layers. + """ + + def __init__(self, config: RecurrentEncoderConfig) -> None: + TorchModel.__init__(self, config) + + # Maybe create a tokenizer + if config.tokenizer_config is not None: + self.tokenizer = config.tokenizer_config.build(framework="torch") + lstm_input_dims = config.tokenizer_config.output_dims + else: + self.tokenizer = None + lstm_input_dims = config.input_dims + + # We only support 1D spaces right now. + assert len(lstm_input_dims) == 1 + lstm_input_dim = lstm_input_dims[0] + + lstm_weights_initializer = get_initializer_fn( + config.hidden_weights_initializer, framework="torch" + ) + lstm_bias_initializer = get_initializer_fn( + config.hidden_bias_initializer, framework="torch" + ) + + # Create the torch LSTM layer. + self.lstm = nn.LSTM( + lstm_input_dim, + config.hidden_dim, + config.num_layers, + batch_first=config.batch_major, + bias=config.use_bias, + ) + + # Initialize LSTM layer weigths and biases, if necessary. + for layer in self.lstm.all_weights: + if lstm_weights_initializer: + lstm_weights_initializer( + layer[0], **config.hidden_weights_initializer_config or {} + ) + lstm_weights_initializer( + layer[1], **config.hidden_weights_initializer_config or {} + ) + if lstm_bias_initializer: + lstm_bias_initializer( + layer[2], **config.hidden_bias_initializer_config or {} + ) + lstm_bias_initializer( + layer[3], **config.hidden_bias_initializer_config or {} + ) + + @override(Model) + def get_initial_state(self): + return { + "h": torch.zeros(self.config.num_layers, self.config.hidden_dim), + "c": torch.zeros(self.config.num_layers, self.config.hidden_dim), + } + + @override(Model) + def _forward(self, inputs: dict, **kwargs) -> dict: + outputs = {} + + if self.tokenizer is not None: + # Push observations through the tokenizer encoder if we built one. + out = tokenize(self.tokenizer, inputs, framework="torch") + else: + # Otherwise, just use the raw observations. + out = inputs[Columns.OBS].float() + + # States are batch-first when coming in. Make them layers-first. + states_in = tree.map_structure( + lambda s: s.transpose(0, 1), inputs[Columns.STATE_IN] + ) + + out, states_out = self.lstm(out, (states_in["h"], states_in["c"])) + states_out = {"h": states_out[0], "c": states_out[1]} + + # Insert them into the output dict. + outputs[ENCODER_OUT] = out + outputs[Columns.STATE_OUT] = tree.map_structure( + lambda s: s.transpose(0, 1), states_out + ) + return outputs diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/heads.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/heads.py new file mode 100644 index 0000000000000000000000000000000000000000..844c40c4a44cbe0617c5008b0464acc20ed17653 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/heads.py @@ -0,0 +1,197 @@ +import numpy as np + +from ray.rllib.core.models.base import Model +from ray.rllib.core.models.configs import ( + CNNTransposeHeadConfig, + FreeLogStdMLPHeadConfig, + MLPHeadConfig, +) +from ray.rllib.core.models.torch.base import TorchModel +from ray.rllib.core.models.torch.primitives import TorchCNNTranspose, TorchMLP +from ray.rllib.models.utils import get_initializer_fn +from ray.rllib.utils.annotations import override +from ray.rllib.utils.framework import try_import_torch + +torch, nn = try_import_torch() + + +class TorchMLPHead(TorchModel): + def __init__(self, config: MLPHeadConfig) -> None: + super().__init__(config) + + self.net = TorchMLP( + input_dim=config.input_dims[0], + hidden_layer_dims=config.hidden_layer_dims, + hidden_layer_activation=config.hidden_layer_activation, + hidden_layer_use_layernorm=config.hidden_layer_use_layernorm, + hidden_layer_use_bias=config.hidden_layer_use_bias, + hidden_layer_weights_initializer=config.hidden_layer_weights_initializer, + hidden_layer_weights_initializer_config=( + config.hidden_layer_weights_initializer_config + ), + hidden_layer_bias_initializer=config.hidden_layer_bias_initializer, + hidden_layer_bias_initializer_config=( + config.hidden_layer_bias_initializer_config + ), + output_dim=config.output_layer_dim, + output_activation=config.output_layer_activation, + output_use_bias=config.output_layer_use_bias, + output_weights_initializer=config.output_layer_weights_initializer, + output_weights_initializer_config=( + config.output_layer_weights_initializer_config + ), + output_bias_initializer=config.output_layer_bias_initializer, + output_bias_initializer_config=config.output_layer_bias_initializer_config, + ) + # If log standard deviations should be clipped. This should be only true for + # policy heads. Value heads should never be clipped. + self.clip_log_std = config.clip_log_std + # The clipping parameter for the log standard deviation. + self.log_std_clip_param = torch.Tensor([config.log_std_clip_param]) + # Register a buffer to handle device mapping. + self.register_buffer("log_std_clip_param_const", self.log_std_clip_param) + + @override(Model) + def _forward(self, inputs: torch.Tensor, **kwargs) -> torch.Tensor: + # Only clip the log standard deviations, if the user wants to clip. This + # avoids also clipping value heads. + if self.clip_log_std: + # Forward pass. + means, log_stds = torch.chunk(self.net(inputs), chunks=2, dim=-1) + # Clip the log standard deviations. + log_stds = torch.clamp( + log_stds, -self.log_std_clip_param_const, self.log_std_clip_param_const + ) + return torch.cat((means, log_stds), dim=-1) + # Otherwise just return the logits. + else: + return self.net(inputs) + + +class TorchFreeLogStdMLPHead(TorchModel): + """An MLPHead that implements floating log stds for Gaussian distributions.""" + + def __init__(self, config: FreeLogStdMLPHeadConfig) -> None: + super().__init__(config) + + assert config.output_dims[0] % 2 == 0, "output_dims must be even for free std!" + self._half_output_dim = config.output_dims[0] // 2 + + self.net = TorchMLP( + input_dim=config.input_dims[0], + hidden_layer_dims=config.hidden_layer_dims, + hidden_layer_activation=config.hidden_layer_activation, + hidden_layer_use_layernorm=config.hidden_layer_use_layernorm, + hidden_layer_use_bias=config.hidden_layer_use_bias, + hidden_layer_weights_initializer=config.hidden_layer_weights_initializer, + hidden_layer_weights_initializer_config=( + config.hidden_layer_weights_initializer_config + ), + hidden_layer_bias_initializer=config.hidden_layer_bias_initializer, + hidden_layer_bias_initializer_config=( + config.hidden_layer_bias_initializer_config + ), + output_dim=self._half_output_dim, + output_activation=config.output_layer_activation, + output_use_bias=config.output_layer_use_bias, + output_weights_initializer=config.output_layer_weights_initializer, + output_weights_initializer_config=( + config.output_layer_weights_initializer_config + ), + output_bias_initializer=config.output_layer_bias_initializer, + output_bias_initializer_config=config.output_layer_bias_initializer_config, + ) + + self.log_std = torch.nn.Parameter( + torch.as_tensor([0.0] * self._half_output_dim) + ) + # If log standard deviations should be clipped. This should be only true for + # policy heads. Value heads should never be clipped. + self.clip_log_std = config.clip_log_std + # The clipping parameter for the log standard deviation. + self.log_std_clip_param = torch.Tensor( + [config.log_std_clip_param], device=self.log_std.device + ) + # Register a buffer to handle device mapping. + self.register_buffer("log_std_clip_param_const", self.log_std_clip_param) + + @override(Model) + def _forward(self, inputs: torch.Tensor, **kwargs) -> torch.Tensor: + # Compute the mean first, then append the log_std. + mean = self.net(inputs) + + # If log standard deviation should be clipped. + if self.clip_log_std: + # Clip the log standard deviation to avoid running into too small + # deviations that factually collapses the policy. + log_std = torch.clamp( + self.log_std, + -self.log_std_clip_param_const, + self.log_std_clip_param_const, + ) + else: + log_std = self.log_std + + return torch.cat([mean, log_std.unsqueeze(0).repeat([len(mean), 1])], axis=1) + + +class TorchCNNTransposeHead(TorchModel): + def __init__(self, config: CNNTransposeHeadConfig) -> None: + super().__init__(config) + + # Initial, inactivated Dense layer (always w/ bias). + # This layer is responsible for getting the incoming tensor into a proper + # initial image shape (w x h x filters) for the suceeding Conv2DTranspose stack. + self.initial_dense = nn.Linear( + in_features=config.input_dims[0], + out_features=int(np.prod(config.initial_image_dims)), + bias=True, + ) + + # Initial Dense layer initializers. + initial_dense_weights_initializer = get_initializer_fn( + config.initial_dense_weights_initializer, framework="torch" + ) + initial_dense_bias_initializer = get_initializer_fn( + config.initial_dense_bias_initializer, framework="torch" + ) + + # Initialize dense layer weights, if necessary. + if initial_dense_weights_initializer: + initial_dense_weights_initializer( + self.initial_dense.weight, + **config.initial_dense_weights_initializer_config or {}, + ) + # Initialized dense layer bais, if necessary. + if initial_dense_bias_initializer: + initial_dense_bias_initializer( + self.initial_dense.bias, + **config.initial_dense_bias_initializer_config or {}, + ) + + # The main CNNTranspose stack. + self.cnn_transpose_net = TorchCNNTranspose( + input_dims=config.initial_image_dims, + cnn_transpose_filter_specifiers=config.cnn_transpose_filter_specifiers, + cnn_transpose_activation=config.cnn_transpose_activation, + cnn_transpose_use_layernorm=config.cnn_transpose_use_layernorm, + cnn_transpose_use_bias=config.cnn_transpose_use_bias, + cnn_transpose_kernel_initializer=config.cnn_transpose_kernel_initializer, + cnn_transpose_kernel_initializer_config=( + config.cnn_transpose_kernel_initializer_config + ), + cnn_transpose_bias_initializer=config.cnn_transpose_bias_initializer, + cnn_transpose_bias_initializer_config=( + config.cnn_transpose_bias_initializer_config + ), + ) + + @override(Model) + def _forward(self, inputs: torch.Tensor, **kwargs) -> torch.Tensor: + out = self.initial_dense(inputs) + # Reshape to initial 3D (image-like) format to enter CNN transpose stack. + out = out.reshape((-1,) + tuple(self.config.initial_image_dims)) + out = self.cnn_transpose_net(out) + # Add 0.5 to center (always non-activated, non-normalized) outputs more + # around 0.0. + return out + 0.5 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/primitives.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/primitives.py new file mode 100644 index 0000000000000000000000000000000000000000..9c4e5574351093e658cf08aa387c172ad4c042a4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/primitives.py @@ -0,0 +1,479 @@ +from typing import Callable, Dict, List, Optional, Union, Tuple + +from ray.rllib.core.models.torch.utils import Stride2D +from ray.rllib.models.torch.misc import ( + same_padding, + same_padding_transpose_after_stride, + valid_padding, +) +from ray.rllib.models.utils import get_activation_fn, get_initializer_fn +from ray.rllib.utils.framework import try_import_torch + +torch, nn = try_import_torch() + + +class TorchMLP(nn.Module): + """A multi-layer perceptron with N dense layers. + + All layers (except for an optional additional extra output layer) share the same + activation function, bias setup (use bias or not), and LayerNorm setup + (use layer normalization or not). + + If `output_dim` (int) is not None, an additional, extra output dense layer is added, + which might have its own activation function (e.g. "linear"). However, the output + layer does NOT use layer normalization. + """ + + def __init__( + self, + *, + input_dim: int, + hidden_layer_dims: List[int], + hidden_layer_activation: Union[str, Callable] = "relu", + hidden_layer_use_bias: bool = True, + hidden_layer_use_layernorm: bool = False, + hidden_layer_weights_initializer: Optional[Union[str, Callable]] = None, + hidden_layer_weights_initializer_config: Optional[Union[str, Callable]] = None, + hidden_layer_bias_initializer: Optional[Union[str, Callable]] = None, + hidden_layer_bias_initializer_config: Optional[Dict] = None, + output_dim: Optional[int] = None, + output_use_bias: bool = True, + output_activation: Union[str, Callable] = "linear", + output_weights_initializer: Optional[Union[str, Callable]] = None, + output_weights_initializer_config: Optional[Dict] = None, + output_bias_initializer: Optional[Union[str, Callable]] = None, + output_bias_initializer_config: Optional[Dict] = None, + ): + """Initialize a TorchMLP object. + + Args: + input_dim: The input dimension of the network. Must not be None. + hidden_layer_dims: The sizes of the hidden layers. If an empty list, only a + single layer will be built of size `output_dim`. + hidden_layer_use_layernorm: Whether to insert a LayerNormalization + functionality in between each hidden layer's output and its activation. + hidden_layer_use_bias: Whether to use bias on all dense layers (excluding + the possible separate output layer). + hidden_layer_activation: The activation function to use after each layer + (except for the output). Either a torch.nn.[activation fn] callable or + the name thereof, or an RLlib recognized activation name, + e.g. "ReLU", "relu", "tanh", "SiLU", or "linear". + hidden_layer_weights_initializer: The initializer function or class to use + forweights initialization in the hidden layers. If `None` the default + initializer of the respective dense layer is used. Note, only the + in-place initializers, i.e. ending with an underscore "_" are allowed. + hidden_layer_weights_initializer_config: Configuration to pass into the + initializer defined in `hidden_layer_weights_initializer`. + hidden_layer_bias_initializer: The initializer function or class to use for + bias initialization in the hidden layers. If `None` the default + initializer of the respective dense layer is used. Note, only the + in-place initializers, i.e. ending with an underscore "_" are allowed. + hidden_layer_bias_initializer_config: Configuration to pass into the + initializer defined in `hidden_layer_bias_initializer`. + output_dim: The output dimension of the network. If None, no specific output + layer will be added and the last layer in the stack will have + size=`hidden_layer_dims[-1]`. + output_use_bias: Whether to use bias on the separate output layer, + if any. + output_activation: The activation function to use for the output layer + (if any). Either a torch.nn.[activation fn] callable or + the name thereof, or an RLlib recognized activation name, + e.g. "ReLU", "relu", "tanh", "SiLU", or "linear". + output_layer_weights_initializer: The initializer function or class to use + for weights initialization in the output layers. If `None` the default + initializer of the respective dense layer is used. Note, only the + in-place initializers, i.e. ending with an underscore "_" are allowed. + output_layer_weights_initializer_config: Configuration to pass into the + initializer defined in `output_layer_weights_initializer`. + output_layer_bias_initializer: The initializer function or class to use for + bias initialization in the output layers. If `None` the default + initializer of the respective dense layer is used. Note, only the + in-place initializers, i.e. ending with an underscore "_" are allowed. + output_layer_bias_initializer_config: Configuration to pass into the + initializer defined in `output_layer_bias_initializer`. + """ + super().__init__() + assert input_dim > 0 + + self.input_dim = input_dim + + hidden_activation = get_activation_fn( + hidden_layer_activation, framework="torch" + ) + hidden_weights_initializer = get_initializer_fn( + hidden_layer_weights_initializer, framework="torch" + ) + hidden_bias_initializer = get_initializer_fn( + hidden_layer_bias_initializer, framework="torch" + ) + output_weights_initializer = get_initializer_fn( + output_weights_initializer, framework="torch" + ) + output_bias_initializer = get_initializer_fn( + output_bias_initializer, framework="torch" + ) + + layers = [] + dims = ( + [self.input_dim] + + list(hidden_layer_dims) + + ([output_dim] if output_dim else []) + ) + for i in range(0, len(dims) - 1): + # Whether we are already processing the last (special) output layer. + is_output_layer = output_dim is not None and i == len(dims) - 2 + + layer = nn.Linear( + dims[i], + dims[i + 1], + bias=output_use_bias if is_output_layer else hidden_layer_use_bias, + ) + # Initialize layers, if necessary. + if is_output_layer: + # Initialize output layer weigths if necessary. + if output_weights_initializer: + output_weights_initializer( + layer.weight, **output_weights_initializer_config or {} + ) + # Initialize output layer bias if necessary. + if output_bias_initializer: + output_bias_initializer( + layer.bias, **output_bias_initializer_config or {} + ) + # Must be hidden. + else: + # Initialize hidden layer weights if necessary. + if hidden_layer_weights_initializer: + hidden_weights_initializer( + layer.weight, **hidden_layer_weights_initializer_config or {} + ) + # Initialize hidden layer bias if necessary. + if hidden_layer_bias_initializer: + hidden_bias_initializer( + layer.bias, **hidden_layer_bias_initializer_config or {} + ) + + layers.append(layer) + + # We are still in the hidden layer section: Possibly add layernorm and + # hidden activation. + if not is_output_layer: + # Insert a layer normalization in between layer's output and + # the activation. + if hidden_layer_use_layernorm: + # We use an epsilon of 0.001 here to mimick the Tf default behavior. + layers.append(nn.LayerNorm(dims[i + 1], eps=0.001)) + # Add the activation function. + if hidden_activation is not None: + layers.append(hidden_activation()) + + # Add output layer's (if any) activation. + output_activation = get_activation_fn(output_activation, framework="torch") + if output_dim is not None and output_activation is not None: + layers.append(output_activation()) + + self.mlp = nn.Sequential(*layers) + + def forward(self, x): + return self.mlp(x) + + +class TorchCNN(nn.Module): + """A model containing a CNN with N Conv2D layers. + + All layers share the same activation function, bias setup (use bias or not), + and LayerNorm setup (use layer normalization or not). + + Note that there is no flattening nor an additional dense layer at the end of the + stack. The output of the network is a 3D tensor of dimensions + [width x height x num output filters]. + """ + + def __init__( + self, + *, + input_dims: Union[List[int], Tuple[int]], + cnn_filter_specifiers: List[List[Union[int, List]]], + cnn_use_bias: bool = True, + cnn_use_layernorm: bool = False, + cnn_activation: str = "relu", + cnn_kernel_initializer: Optional[Union[str, Callable]] = None, + cnn_kernel_initializer_config: Optional[Dict] = None, + cnn_bias_initializer: Optional[Union[str, Callable]] = None, + cnn_bias_initializer_config: Optional[Dict] = None, + ): + """Initializes a TorchCNN instance. + + Args: + input_dims: The 3D input dimensions of the network (incoming image). + cnn_filter_specifiers: A list in which each element is another (inner) list + of either the following forms: + `[number of channels/filters, kernel, stride]` + OR: + `[number of channels/filters, kernel, stride, padding]`, where `padding` + can either be "same" or "valid". + When using the first format w/o the `padding` specifier, `padding` is + "same" by default. Also, `kernel` and `stride` may be provided either as + single ints (square) or as a tuple/list of two ints (width- and height + dimensions) for non-squared kernel/stride shapes. + A good rule of thumb for constructing CNN stacks is: + When using padding="same", the input "image" will be reduced in size by + the factor `stride`, e.g. input=(84, 84, 3) stride=2 kernel=x + padding="same" filters=16 -> output=(42, 42, 16). + For example, if you would like to reduce an Atari image from its + original (84, 84, 3) dimensions down to (6, 6, F), you can construct the + following stack and reduce the w x h dimension of the image by 2 in each + layer: + [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]] -> output=(6, 6, 128) + cnn_use_bias: Whether to use bias on all Conv2D layers. + cnn_activation: The activation function to use after each Conv2D layer. + cnn_use_layernorm: Whether to insert a LayerNormalization functionality + in between each Conv2D layer's outputs and its activation. + cnn_kernel_initializer: The initializer function or class to use for kernel + initialization in the CNN layers. If `None` the default initializer of + the respective CNN layer is used. Note, only the in-place + initializers, i.e. ending with an underscore "_" are allowed. + cnn_kernel_initializer_config: Configuration to pass into the initializer + defined in `cnn_kernel_initializer`. + cnn_bias_initializer: The initializer function or class to use for bias + initializationcin the CNN layers. If `None` the default initializer of + the respective CNN layer is used. Note, only the in-place initializers, + i.e. ending with an underscore "_" are allowed. + cnn_bias_initializer_config: Configuration to pass into the initializer + defined in `cnn_bias_initializer`. + """ + super().__init__() + + assert len(input_dims) == 3 + + cnn_activation = get_activation_fn(cnn_activation, framework="torch") + cnn_kernel_initializer = get_initializer_fn( + cnn_kernel_initializer, framework="torch" + ) + cnn_bias_initializer = get_initializer_fn( + cnn_bias_initializer, framework="torch" + ) + layers = [] + + # Add user-specified hidden convolutional layers first + width, height, in_depth = input_dims + in_size = [width, height] + for filter_specs in cnn_filter_specifiers: + # Padding information not provided -> Use "same" as default. + if len(filter_specs) == 3: + out_depth, kernel_size, strides = filter_specs + padding = "same" + # Padding information provided. + else: + out_depth, kernel_size, strides, padding = filter_specs + + # Pad like in tensorflow's SAME/VALID mode. + if padding == "same": + padding_size, out_size = same_padding(in_size, kernel_size, strides) + layers.append(nn.ZeroPad2d(padding_size)) + # No actual padding is performed for "valid" mode, but we will still + # compute the output size (input for the next layer). + else: + out_size = valid_padding(in_size, kernel_size, strides) + + layer = nn.Conv2d( + in_depth, out_depth, kernel_size, strides, bias=cnn_use_bias + ) + + # Initialize CNN layer kernel if necessary. + if cnn_kernel_initializer: + cnn_kernel_initializer( + layer.weight, **cnn_kernel_initializer_config or {} + ) + # Initialize CNN layer bias if necessary. + if cnn_bias_initializer: + cnn_bias_initializer(layer.bias, **cnn_bias_initializer_config or {}) + + layers.append(layer) + + # Layernorm. + if cnn_use_layernorm: + # We use an epsilon of 0.001 here to mimick the Tf default behavior. + layers.append(LayerNorm1D(out_depth, eps=0.001)) + # Activation. + if cnn_activation is not None: + layers.append(cnn_activation()) + + in_size = out_size + in_depth = out_depth + + # Create the CNN. + self.cnn = nn.Sequential(*layers) + + def forward(self, inputs): + # Permute b/c data comes in as channels_last ([B, dim, dim, channels]) -> + # Convert to `channels_first` for torch: + inputs = inputs.permute(0, 3, 1, 2) + out = self.cnn(inputs) + # Permute back to `channels_last`. + return out.permute(0, 2, 3, 1) + + +class TorchCNNTranspose(nn.Module): + """A model containing a CNNTranspose with N Conv2DTranspose layers. + + All layers share the same activation function, bias setup (use bias or not), + and LayerNormalization setup (use layer normalization or not), except for the last + one, which is never activated and never layer norm'd. + + Note that there is no reshaping/flattening nor an additional dense layer at the + beginning or end of the stack. The input as well as output of the network are 3D + tensors of dimensions [width x height x num output filters]. + """ + + def __init__( + self, + *, + input_dims: Union[List[int], Tuple[int]], + cnn_transpose_filter_specifiers: List[List[Union[int, List]]], + cnn_transpose_use_bias: bool = True, + cnn_transpose_activation: str = "relu", + cnn_transpose_use_layernorm: bool = False, + cnn_transpose_kernel_initializer: Optional[Union[str, Callable]] = None, + cnn_transpose_kernel_initializer_config: Optional[Dict] = None, + cnn_transpose_bias_initializer: Optional[Union[str, Callable]] = None, + cnn_transpose_bias_initializer_config: Optional[Dict] = None, + ): + """Initializes a TorchCNNTranspose instance. + + Args: + input_dims: The 3D input dimensions of the network (incoming image). + cnn_transpose_filter_specifiers: A list of lists, where each item represents + one Conv2DTranspose layer. Each such Conv2DTranspose layer is further + specified by the elements of the inner lists. The inner lists follow + the format: `[number of filters, kernel, stride]` to + specify a convolutional-transpose layer stacked in order of the + outer list. + `kernel` as well as `stride` might be provided as width x height tuples + OR as single ints representing both dimension (width and height) + in case of square shapes. + cnn_transpose_use_bias: Whether to use bias on all Conv2DTranspose layers. + cnn_transpose_use_layernorm: Whether to insert a LayerNormalization + functionality in between each Conv2DTranspose layer's outputs and its + activation. + The last Conv2DTranspose layer will not be normed, regardless. + cnn_transpose_activation: The activation function to use after each layer + (except for the last Conv2DTranspose layer, which is always + non-activated). + cnn_transpose_kernel_initializer: The initializer function or class to use + for kernel initialization in the CNN layers. If `None` the default + initializer of the respective CNN layer is used. Note, only the + in-place initializers, i.e. ending with an underscore "_" are allowed. + cnn_transpose_kernel_initializer_config: Configuration to pass into the + initializer defined in `cnn_transpose_kernel_initializer`. + cnn_transpose_bias_initializer: The initializer function or class to use for + bias initialization in the CNN layers. If `None` the default initializer + of the respective CNN layer is used. Note, only the in-place + initializers, i.e. ending with an underscore "_" are allowed. + cnn_transpose_bias_initializer_config: Configuration to pass into the + initializer defined in `cnn_transpose_bias_initializer`. + """ + super().__init__() + + assert len(input_dims) == 3 + + cnn_transpose_activation = get_activation_fn( + cnn_transpose_activation, framework="torch" + ) + cnn_transpose_kernel_initializer = get_initializer_fn( + cnn_transpose_kernel_initializer, framework="torch" + ) + cnn_transpose_bias_initializer = get_initializer_fn( + cnn_transpose_bias_initializer, framework="torch" + ) + + layers = [] + + # Add user-specified hidden convolutional layers first + width, height, in_depth = input_dims + in_size = [width, height] + for i, (out_depth, kernel, stride) in enumerate( + cnn_transpose_filter_specifiers + ): + is_final_layer = i == len(cnn_transpose_filter_specifiers) - 1 + + # Resolve stride and kernel width/height values if only int given (squared). + s_w, s_h = (stride, stride) if isinstance(stride, int) else stride + k_w, k_h = (kernel, kernel) if isinstance(kernel, int) else kernel + + # Stride the incoming image first. + stride_layer = Stride2D(in_size[0], in_size[1], s_w, s_h) + layers.append(stride_layer) + # Then 0-pad (like in tensorflow's SAME mode). + # This will return the necessary padding such that for stride=1, the output + # image has the same size as the input image, for stride=2, the output image + # is 2x the input image, etc.. + padding, out_size = same_padding_transpose_after_stride( + (stride_layer.out_width, stride_layer.out_height), kernel, stride + ) + layers.append(nn.ZeroPad2d(padding)) # left, right, top, bottom + # Then do the Conv2DTranspose operation + # (now that we have padded and strided manually, w/o any more padding using + # stride=1). + + layer = nn.ConvTranspose2d( + in_depth, + out_depth, + kernel, + # Force-set stride to 1 as we already took care of it. + 1, + # Disable torch auto-padding (torch interprets the padding setting + # as: dilation (==1.0) * [`kernel` - 1] - [`padding`]). + padding=(k_w - 1, k_h - 1), + # Last layer always uses bias (b/c has no LayerNorm, regardless of + # config). + bias=cnn_transpose_use_bias or is_final_layer, + ) + + # Initialize CNN Transpose layer kernel if necessary. + if cnn_transpose_kernel_initializer: + cnn_transpose_kernel_initializer( + layer.weight, **cnn_transpose_kernel_initializer_config or {} + ) + # Initialize CNN Transpose layer bias if necessary. + if cnn_transpose_bias_initializer: + cnn_transpose_bias_initializer( + layer.bias, **cnn_transpose_bias_initializer_config or {} + ) + + layers.append(layer) + # Layernorm (never for final layer). + if cnn_transpose_use_layernorm and not is_final_layer: + layers.append(LayerNorm1D(out_depth, eps=0.001)) + # Last layer is never activated (regardless of config). + if cnn_transpose_activation is not None and not is_final_layer: + layers.append(cnn_transpose_activation()) + + in_size = (out_size[0], out_size[1]) + in_depth = out_depth + + # Create the final CNNTranspose network. + self.cnn_transpose = nn.Sequential(*layers) + + def forward(self, inputs): + # Permute b/c data comes in as [B, dim, dim, channels]: + out = inputs.permute(0, 3, 1, 2) + out = self.cnn_transpose(out) + return out.permute(0, 2, 3, 1) + + +class LayerNorm1D(nn.Module): + def __init__(self, num_features, **kwargs): + super().__init__() + self.layer_norm = nn.LayerNorm(num_features, **kwargs) + + def forward(self, x): + # x shape: (B, dim, dim, channels). + batch_size, channels, h, w = x.size() + # Reshape to (batch_size * height * width, channels) for LayerNorm + x = x.permute(0, 2, 3, 1).reshape(-1, channels) + # Apply LayerNorm + x = self.layer_norm(x) + # Reshape back to (batch_size, dim, dim, channels) + x = x.reshape(batch_size, h, w, channels).permute(0, 3, 1, 2) + return x diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/utils.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1bdbdef016f4c8a7261ff83f14d9c70810d6a90e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/utils.py @@ -0,0 +1,85 @@ +from ray.rllib.utils.framework import try_import_torch + +torch, nn = try_import_torch() + + +class Stride2D(nn.Module): + """A striding layer for doing torch Conv2DTranspose operations. + + Using this layer before the 0-padding (on a 3D input "image") and before + the actual ConvTranspose2d allows for a padding="same" behavior that matches + 100% that of a `tf.keras.layers.Conv2DTranspose` layer. + + Examples: + Input image (4x4): + A B C D + E F G H + I J K L + M N O P + + Stride with stride=2 -> output image=(7x7) + A 0 B 0 C 0 D + 0 0 0 0 0 0 0 + E 0 F 0 G 0 H + 0 0 0 0 0 0 0 + I 0 J 0 K 0 L + 0 0 0 0 0 0 0 + M 0 N 0 O 0 P + """ + + def __init__(self, width, height, stride_w, stride_h): + """Initializes a Stride2D instance. + + Args: + width: The width of the 3D input "image". + height: The height of the 3D input "image". + stride_w: The stride in width direction, with which to stride the incoming + image. + stride_h: The stride in height direction, with which to stride the incoming + image. + """ + super().__init__() + + self.width = width + self.height = height + self.stride_w = stride_w + self.stride_h = stride_h + + self.register_buffer( + "zeros", + torch.zeros( + size=( + self.width * self.stride_w - (self.stride_w - 1), + self.height * self.stride_h - (self.stride_h - 1), + ), + dtype=torch.float32, + ), + ) + + self.out_width, self.out_height = self.zeros.shape[0], self.zeros.shape[1] + # Squeeze in batch and channel dims. + self.zeros = self.zeros.unsqueeze(0).unsqueeze(0) + + where_template = torch.zeros( + (self.stride_w, self.stride_h), dtype=torch.float32 + ) + # Set upper/left corner to 1.0. + where_template[0][0] = 1.0 + # then tile across the entire (strided) image size. + where_template = where_template.repeat((self.height, self.width))[ + : -(self.stride_w - 1), : -(self.stride_h - 1) + ] + # Squeeze in batch and channel dims and convert to bool. + where_template = where_template.unsqueeze(0).unsqueeze(0).bool() + self.register_buffer("where_template", where_template) + + def forward(self, x): + # Repeat incoming image stride(w/h) times to match the strided output template. + repeated_x = ( + x.repeat_interleave(self.stride_w, dim=-2).repeat_interleave( + self.stride_h, dim=-1 + ) + )[:, :, : -(self.stride_w - 1), : -(self.stride_h - 1)] + # Where `self.where_template` == 1.0 -> Use image pixel, otherwise use + # zero filler value. + return torch.where(self.where_template, repeated_x, self.zeros) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e35a487280c1a44fb1aced1815adc2863bd11ad8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/bc_algorithm.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/bc_algorithm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08776a436e2bc3be140e6a0b1cca8d6b6e7199ab Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/bc_algorithm.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/testing_learner.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/testing_learner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..555cd758ceee3f06052209e2495ac881e04c43d5 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/testing_learner.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/bc_algorithm.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/bc_algorithm.py new file mode 100644 index 0000000000000000000000000000000000000000..8f5c3bdbf50fb18c394d546ec8a0f455706ebd0e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/bc_algorithm.py @@ -0,0 +1,49 @@ +"""Contains example implementation of a custom algorithm. + +Note: It doesn't include any real use-case functionality; it only serves as an example +to test the algorithm construction and customization. +""" + +from ray.rllib.algorithms import Algorithm, AlgorithmConfig +from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2 +from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2 +from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule +from ray.rllib.core.testing.torch.bc_learner import BCTorchLearner +from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule +from ray.rllib.core.testing.tf.bc_learner import BCTfLearner +from ray.rllib.core.rl_module.rl_module import RLModuleSpec +from ray.rllib.utils.annotations import override +from ray.rllib.utils.typing import ResultDict + + +class BCConfigTest(AlgorithmConfig): + def __init__(self, algo_class=None): + super().__init__(algo_class=algo_class or BCAlgorithmTest) + + def get_default_rl_module_spec(self): + if self.framework_str == "torch": + return RLModuleSpec(module_class=DiscreteBCTorchModule) + elif self.framework_str == "tf2": + return RLModuleSpec(module_class=DiscreteBCTFModule) + + def get_default_learner_class(self): + if self.framework_str == "torch": + return BCTorchLearner + elif self.framework_str == "tf2": + return BCTfLearner + + +class BCAlgorithmTest(Algorithm): + @classmethod + def get_default_policy_class(cls, config: AlgorithmConfig): + if config.framework_str == "torch": + return TorchPolicyV2 + elif config.framework_str == "tf2": + return EagerTFPolicyV2 + else: + raise ValueError("Unknown framework: {}".format(config.framework_str)) + + @override(Algorithm) + def training_step(self) -> ResultDict: + # do nothing. + return {} diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/testing_learner.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/testing_learner.py new file mode 100644 index 0000000000000000000000000000000000000000..1e43dd098aa76231462291c5363d959e1c3e2a20 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/testing_learner.py @@ -0,0 +1,75 @@ +from typing import Type + +import numpy as np + +from ray.rllib.algorithms.algorithm_config import AlgorithmConfig +from ray.rllib.core import DEFAULT_MODULE_ID +from ray.rllib.core.learner.learner import Learner +from ray.rllib.core.rl_module.multi_rl_module import ( + MultiRLModule, + MultiRLModuleSpec, +) +from ray.rllib.core.rl_module.rl_module import RLModuleSpec +from ray.rllib.utils.annotations import override +from ray.rllib.utils.numpy import convert_to_numpy +from ray.rllib.utils.typing import RLModuleSpecType + + +class BaseTestingAlgorithmConfig(AlgorithmConfig): + # A test setting to activate metrics on mean weights. + report_mean_weights: bool = True + + @override(AlgorithmConfig) + def get_default_learner_class(self) -> Type["Learner"]: + if self.framework_str == "tf2": + from ray.rllib.core.testing.tf.bc_learner import BCTfLearner + + return BCTfLearner + elif self.framework_str == "torch": + from ray.rllib.core.testing.torch.bc_learner import BCTorchLearner + + return BCTorchLearner + else: + raise ValueError(f"Unsupported framework: {self.framework_str}") + + @override(AlgorithmConfig) + def get_default_rl_module_spec(self) -> "RLModuleSpecType": + if self.framework_str == "tf2": + from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule + + cls = DiscreteBCTFModule + elif self.framework_str == "torch": + from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule + + cls = DiscreteBCTorchModule + else: + raise ValueError(f"Unsupported framework: {self.framework_str}") + + spec = RLModuleSpec( + module_class=cls, + model_config={"fcnet_hiddens": [32]}, + ) + + if self.is_multi_agent: + # TODO (Kourosh): Make this more multi-agent for example with policy ids + # "1" and "2". + return MultiRLModuleSpec( + multi_rl_module_class=MultiRLModule, + rl_module_specs={DEFAULT_MODULE_ID: spec}, + ) + else: + return spec + + +class BaseTestingLearner(Learner): + @override(Learner) + def after_gradient_based_update(self, *, timesteps): + # This is to check if in the multi-gpu case, the weights across workers are + # the same. It is really only needed during testing. + if self.config.report_mean_weights: + for module_id in self.module.keys(): + parameters = convert_to_numpy( + self.get_parameters(self.module[module_id]) + ) + mean_ws = np.mean([w.mean() for w in parameters]) + self.metrics.log_value((module_id, "mean_weight"), mean_ws, window=1) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07b0eabdfd71096e3ad9b28a66fcb7522110116b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_learner.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_learner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..373bee6ba27970d33e16833611b9d6c3cbb99a7c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_learner.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_module.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_module.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ccea5bfb162ee9c7367d5cd2e99d14b4e97d46d5 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_module.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_learner.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_learner.py new file mode 100644 index 0000000000000000000000000000000000000000..3c23d3d9732e8e56daa5d5515ef18e868c6813ba --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_learner.py @@ -0,0 +1,34 @@ +import tensorflow as tf +from typing import Dict, TYPE_CHECKING + +from ray.rllib.core.columns import Columns +from ray.rllib.core.learner.tf.tf_learner import TfLearner +from ray.rllib.core.testing.testing_learner import BaseTestingLearner +from ray.rllib.utils.typing import ModuleID, TensorType + +if TYPE_CHECKING: + from ray.rllib.algorithms.algorithm_config import AlgorithmConfig + + +class BCTfLearner(TfLearner, BaseTestingLearner): + def compute_loss_for_module( + self, + *, + module_id: ModuleID, + config: "AlgorithmConfig", + batch: Dict, + fwd_out: Dict[str, TensorType], + ) -> TensorType: + BaseTestingLearner.compute_loss_for_module( + self, + module_id=module_id, + config=config, + batch=batch, + fwd_out=fwd_out, + ) + action_dist_inputs = fwd_out[Columns.ACTION_DIST_INPUTS] + action_dist_class = self._module[module_id].get_train_action_dist_cls() + action_dist = action_dist_class.from_logits(action_dist_inputs) + loss = -tf.math.reduce_mean(action_dist.logp(batch[Columns.ACTIONS])) + + return loss diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_module.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_module.py new file mode 100644 index 0000000000000000000000000000000000000000..ebe0cfe361be021eff3c027bd54e4cd7e3be6620 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_module.py @@ -0,0 +1,101 @@ +import tensorflow as tf +from typing import Any, Dict + +from ray.rllib.core.columns import Columns +from ray.rllib.core.rl_module.rl_module import RLModule +from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule +from ray.rllib.core.rl_module.tf.tf_rl_module import TfRLModule +from ray.rllib.utils.annotations import override +from ray.rllib.utils.typing import StateDict + + +class DiscreteBCTFModule(TfRLModule): + def setup(self): + input_dim = self.observation_space.shape[0] + hidden_dim = self.model_config["fcnet_hiddens"][0] + output_dim = self.action_space.n + layers = [] + + layers.append(tf.keras.Input(shape=(input_dim,))) + layers.append(tf.keras.layers.ReLU()) + layers.append(tf.keras.layers.Dense(hidden_dim)) + layers.append(tf.keras.layers.ReLU()) + layers.append(tf.keras.layers.Dense(output_dim)) + + self.policy = tf.keras.Sequential(layers) + self._input_dim = input_dim + + def _forward(self, batch: Dict[str, Any], **kwargs) -> Dict[str, Any]: + action_logits = self.policy(batch["obs"]) + return {Columns.ACTION_DIST_INPUTS: action_logits} + + @override(RLModule) + def get_state(self, *args, **kwargs) -> StateDict: + return {"policy": self.policy.get_weights()} + + @override(RLModule) + def set_state(self, state: StateDict) -> None: + self.policy.set_weights(state["policy"]) + + +class BCTfRLModuleWithSharedGlobalEncoder(TfRLModule): + def __init__(self, encoder, local_dim, hidden_dim, action_dim): + super().__init__() + + self.encoder = encoder + self.policy_head = tf.keras.Sequential( + [ + tf.keras.layers.Dense( + hidden_dim + local_dim, + input_shape=(hidden_dim + local_dim,), + activation="relu", + ), + tf.keras.layers.Dense(hidden_dim, activation="relu"), + tf.keras.layers.Dense(action_dim), + ] + ) + + def _forward(self, batch, **kwargs): + obs = batch["obs"] + global_enc = self.encoder(obs["global"]) + policy_in = tf.concat([global_enc, obs["local"]], axis=-1) + action_logits = self.policy_head(policy_in) + + return {Columns.ACTION_DIST_INPUTS: action_logits} + + @override(RLModule) + def _default_input_specs(self): + return [("obs", "global"), ("obs", "local")] + + +class BCTfMultiAgentModuleWithSharedEncoder(MultiRLModule): + def setup(self): + # constructing the global encoder based on the observation_space of the first + # module + module_specs = self.config.modules + module_spec = next(iter(module_specs.values())) + global_dim = module_spec.observation_space["global"].shape[0] + hidden_dim = module_spec.model_config_dict["fcnet_hiddens"][0] + shared_encoder = tf.keras.Sequential( + [ + tf.keras.Input(shape=(global_dim,)), + tf.keras.layers.ReLU(), + tf.keras.layers.Dense(hidden_dim), + ] + ) + + for module_id, module_spec in module_specs.items(): + self._rl_modules[module_id] = module_spec.module_class( + encoder=shared_encoder, + local_dim=module_spec.observation_space["local"].shape[0], + hidden_dim=hidden_dim, + action_dim=module_spec.action_space.n, + ) + + def serialize(self): + # TODO (Kourosh): Implement when needed. + raise NotImplementedError + + def deserialize(self, data): + # TODO (Kourosh): Implement when needed. + raise NotImplementedError diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12723a2604185b49fd27ce71ef792787639a8c71 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/bc_learner.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/bc_learner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4fded47b834ee3ba128dafbd877231b1d0ca525 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/bc_learner.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/bc_module.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/bc_module.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36b9fe7b47afd889f624fbeeec345417f02c87dd Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/__pycache__/bc_module.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/bc_learner.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/bc_learner.py new file mode 100644 index 0000000000000000000000000000000000000000..1c12aee7a1ee84a0d6edf995cae8963b93527aa1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/bc_learner.py @@ -0,0 +1,34 @@ +import torch +from typing import Any, Dict, TYPE_CHECKING + +from ray.rllib.core.columns import Columns +from ray.rllib.core.learner.torch.torch_learner import TorchLearner +from ray.rllib.core.testing.testing_learner import BaseTestingLearner +from ray.rllib.utils.typing import ModuleID, TensorType + +if TYPE_CHECKING: + from ray.rllib.algorithms.algorithm_config import AlgorithmConfig + + +class BCTorchLearner(TorchLearner, BaseTestingLearner): + def compute_loss_for_module( + self, + *, + module_id: ModuleID, + config: "AlgorithmConfig", + batch: Dict[str, Any], + fwd_out: Dict[str, TensorType], + ) -> TensorType: + BaseTestingLearner.compute_loss_for_module( + self, + module_id=module_id, + config=config, + batch=batch, + fwd_out=fwd_out, + ) + action_dist_inputs = fwd_out[Columns.ACTION_DIST_INPUTS] + action_dist_class = self._module[module_id].get_train_action_dist_cls() + action_dist = action_dist_class.from_logits(action_dist_inputs) + loss = -torch.mean(action_dist.logp(batch[Columns.ACTIONS])) + + return loss diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/bc_module.py b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/bc_module.py new file mode 100644 index 0000000000000000000000000000000000000000..d2a5d71c5c160ff696d9552a26ec9d95d35aedd4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/core/testing/torch/bc_module.py @@ -0,0 +1,162 @@ +from typing import Any, Dict + +from ray.rllib.core.columns import Columns +from ray.rllib.core.rl_module.rl_module import RLModule +from ray.rllib.models.torch.torch_distributions import TorchCategorical +from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule +from ray.rllib.core.rl_module.torch.torch_rl_module import TorchRLModule +from ray.rllib.core.models.specs.typing import SpecType +from ray.rllib.utils.annotations import override +from ray.rllib.utils.framework import try_import_torch + +torch, nn = try_import_torch() + + +class DiscreteBCTorchModule(TorchRLModule): + def setup(self): + input_dim = self.observation_space.shape[0] + hidden_dim = self.model_config["fcnet_hiddens"][0] + output_dim = self.action_space.n + + self.policy = nn.Sequential( + nn.Linear(input_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, output_dim), + ) + + self.input_dim = input_dim + + def get_train_action_dist_cls(self): + return TorchCategorical + + def get_exploration_action_dist_cls(self): + return TorchCategorical + + def get_inference_action_dist_cls(self): + return TorchCategorical + + @override(RLModule) + def output_specs_exploration(self) -> SpecType: + return [Columns.ACTION_DIST_INPUTS] + + @override(RLModule) + def output_specs_inference(self) -> SpecType: + return [Columns.ACTION_DIST_INPUTS] + + @override(RLModule) + def output_specs_train(self) -> SpecType: + return [Columns.ACTION_DIST_INPUTS] + + @override(RLModule) + def _forward_inference(self, batch: Dict[str, Any]) -> Dict[str, Any]: + with torch.no_grad(): + return self._forward_train(batch) + + @override(RLModule) + def _forward_exploration(self, batch: Dict[str, Any]) -> Dict[str, Any]: + with torch.no_grad(): + return self._forward_train(batch) + + @override(RLModule) + def _forward_train(self, batch: Dict[str, Any]) -> Dict[str, Any]: + action_logits = self.policy(batch["obs"]) + return {Columns.ACTION_DIST_INPUTS: action_logits} + + +class BCTorchRLModuleWithSharedGlobalEncoder(TorchRLModule): + """An example of an RLModule that uses an encoder shared with other things. + + For example, we could consider a multi-agent case where for inference each agent + needs to know the global state of the environment, as well as the local state of + itself. For better representation learning we would like to share the encoder + across all the modules. So this module simply accepts the encoder object as its + input argument and uses it to encode the global state. The local state is passed + through as is. The policy head is then a simple MLP that takes the concatenation of + the global and local state as input and outputs the action logits. + + """ + + def __init__( + self, + encoder: nn.Module, + local_dim: int, + hidden_dim: int, + action_dim: int, + config=None, + ) -> None: + super().__init__(config=config) + + self.encoder = encoder + self.policy_head = nn.Sequential( + nn.Linear(hidden_dim + local_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, action_dim), + ) + + def get_train_action_dist_cls(self): + return TorchCategorical + + def get_exploration_action_dist_cls(self): + return TorchCategorical + + def get_inference_action_dist_cls(self): + return TorchCategorical + + @override(RLModule) + def _default_input_specs(self): + return [("obs", "global"), ("obs", "local")] + + @override(RLModule) + def _forward_inference(self, batch): + with torch.no_grad(): + return self._common_forward(batch) + + @override(RLModule) + def _forward_exploration(self, batch): + with torch.no_grad(): + return self._common_forward(batch) + + @override(RLModule) + def _forward_train(self, batch): + return self._common_forward(batch) + + def _common_forward(self, batch): + obs = batch["obs"] + global_enc = self.encoder(obs["global"]) + policy_in = torch.cat([global_enc, obs["local"]], dim=-1) + action_logits = self.policy_head(policy_in) + + return {Columns.ACTION_DIST_INPUTS: action_logits} + + +class BCTorchMultiAgentModuleWithSharedEncoder(MultiRLModule): + def setup(self): + module_specs = self.config.modules + module_spec = next(iter(module_specs.values())) + global_dim = module_spec.observation_space["global"].shape[0] + hidden_dim = module_spec.model_config_dict["fcnet_hiddens"][0] + shared_encoder = nn.Sequential( + nn.Linear(global_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, hidden_dim), + ) + + rl_modules = {} + for module_id, module_spec in module_specs.items(): + rl_modules[module_id] = module_spec.module_class( + config=self.config.modules[module_id].get_rl_module_config(), + encoder=shared_encoder, + local_dim=module_spec.observation_space["local"].shape[0], + hidden_dim=hidden_dim, + action_dim=module_spec.action_space.n, + ) + + self._rl_modules = rl_modules + + def serialize(self): + # TODO (Kourosh): Implement when needed. + raise NotImplementedError + + def deserialize(self, data): + # TODO (Kourosh): Implement when needed. + raise NotImplementedError diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..09dfbe227e5a6c29bdfa7096758d529ddcb72d55 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__init__.py @@ -0,0 +1,124 @@ +import logging +from typing import Type, Union + +import gymnasium as gym + +from ray.rllib.env.env_context import EnvContext +from ray.rllib.utils.error import ( + ERR_MSG_INVALID_ENV_DESCRIPTOR, + EnvError, +) +from ray.util.annotations import PublicAPI + + +logger = logging.getLogger(__name__) + + +@PublicAPI +def try_import_pyspiel(error: bool = False): + """Tries importing pyspiel and returns the module (or None). + + Args: + error: Whether to raise an error if pyspiel cannot be imported. + + Returns: + The pyspiel module. + + Raises: + ImportError: If error=True and pyspiel is not installed. + """ + try: + import pyspiel + + return pyspiel + except ImportError: + if error: + raise ImportError( + "Could not import pyspiel! Pygame is not a dependency of RLlib " + "and RLlib requires you to install pygame separately: " + "`pip install pygame`." + ) + return None + + +@PublicAPI +def try_import_open_spiel(error: bool = False): + """Tries importing open_spiel and returns the module (or None). + + Args: + error: Whether to raise an error if open_spiel cannot be imported. + + Returns: + The open_spiel module. + + Raises: + ImportError: If error=True and open_spiel is not installed. + """ + try: + import open_spiel + + return open_spiel + except ImportError: + if error: + raise ImportError( + "Could not import open_spiel! open_spiel is not a dependency of RLlib " + "and RLlib requires you to install open_spiel separately: " + "`pip install open_spiel`." + ) + return None + + +def _gym_env_creator( + env_context: EnvContext, + env_descriptor: Union[str, Type[gym.Env]], +) -> gym.Env: + """Tries to create a gym env given an EnvContext object and descriptor. + + Note: This function tries to construct the env from a string descriptor + only using possibly installed RL env packages (such as gym, pybullet_envs, + etc). These packages are no installation requirements for RLlib. In case + you would like to support more such env packages, add the necessary imports + and construction logic below. + + Args: + env_context: The env context object to configure the env. + Note that this is a config dict, plus the properties: + `worker_index`, `vector_index`, and `remote`. + env_descriptor: The env descriptor as a gym-registered string, e.g. CartPole-v1, + ALE/MsPacman-v5, or CartPoleContinuousBulletEnv-v0. + Alternatively, the gym.Env subclass to use. + + Returns: + The actual gym environment object. + + Raises: + gym.error.Error: If the env cannot be constructed. + """ + # Allow for PyBullet or envs to be used as well (via string). This allows + # for doing things like `env=CartPoleContinuousBulletEnv-v0`. + try: + import pybullet_envs + + pybullet_envs.getList() + except (AttributeError, ModuleNotFoundError, ImportError): + pass + + # If env descriptor is a str, starting with "ale_py:ALE/", for now, register all ALE + # envs from ale_py. + if isinstance(env_descriptor, str) and env_descriptor.startswith("ale_py:ALE/"): + import ale_py + + gym.register_envs(ale_py) + + # Try creating a gym env. If this fails we can output a + # decent error message. + try: + # If class provided, call constructor directly. + if isinstance(env_descriptor, type): + env = env_descriptor(env_context) + else: + env = gym.make(env_descriptor, **env_context) + except gym.error.Error: + raise EnvError(ERR_MSG_INVALID_ENV_DESCRIPTOR.format(env_descriptor)) + + return env diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb91dc3f844bba4105a15ba277e1d3b148f32d2d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__pycache__/infinite_lookback_buffer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__pycache__/infinite_lookback_buffer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f821a2d8561a57e8ccc8f9805f81164a17ebbfeb Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/__pycache__/infinite_lookback_buffer.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/external_env_protocol.py b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/external_env_protocol.py new file mode 100644 index 0000000000000000000000000000000000000000..0234d273470fcce32fbc43c391331b4eee6185e6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/external_env_protocol.py @@ -0,0 +1,45 @@ +from enum import Enum + +from ray.util.annotations import PublicAPI + + +@PublicAPI(stability="alpha") +class RLlink(Enum): + # Requests: Client (external env) -> Server (RLlib). + # ---- + # Ping command (initial handshake). + PING = "PING" + # List of episodes (similar to what an EnvRunner.sample() call would return). + EPISODES = "EPISODES" + # Request state (e.g. model weights). + GET_STATE = "GET_STATE" + # Request (relevant) config. + GET_CONFIG = "GET_CONFIG" + # Send episodes and request the next state update right after that. + # Clients sending this message should wait for a SET_STATE message as an immediate + # response. Useful for external samplers that must collect on-policy data. + EPISODES_AND_GET_STATE = "EPISODES_AND_GET_STATE" + + # Responses: Server (RLlib) -> Client (external env). + # ---- + # Pong response (initial handshake). + PONG = "PONG" + # Set state (e.g. model weights). + SET_STATE = "SET_STATE" + # Set (relevant) config. + SET_CONFIG = "SET_CONFIG" + + # @OldAPIStack (to be deprecated soon). + ACTION_SPACE = "ACTION_SPACE" + OBSERVATION_SPACE = "OBSERVATION_SPACE" + GET_WORKER_ARGS = "GET_WORKER_ARGS" + GET_WEIGHTS = "GET_WEIGHTS" + REPORT_SAMPLES = "REPORT_SAMPLES" + START_EPISODE = "START_EPISODE" + GET_ACTION = "GET_ACTION" + LOG_ACTION = "LOG_ACTION" + LOG_RETURNS = "LOG_RETURNS" + END_EPISODE = "END_EPISODE" + + def __str__(self): + return self.name diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/infinite_lookback_buffer.py b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/infinite_lookback_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..26f76fbc31aef80b6a9d09ec17b2046b0b13fc2e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/env/utils/infinite_lookback_buffer.py @@ -0,0 +1,719 @@ +from typing import Any, Dict, List, Optional, Union + +import gymnasium as gym +import numpy as np +import tree # pip install dm_tree + +from ray.rllib.utils.numpy import LARGE_INTEGER, one_hot, one_hot_multidiscrete +from ray.rllib.utils.serialization import gym_space_from_dict, gym_space_to_dict +from ray.rllib.utils.spaces.space_utils import ( + batch, + from_jsonable_if_needed, + get_dummy_batch_for_space, + get_base_struct_from_space, + to_jsonable_if_needed, +) + + +class InfiniteLookbackBuffer: + @property + def space(self): + return self._space + + @space.setter + def space(self, value): + self._space = value + self.space_struct = get_base_struct_from_space(value) + + def __init__( + self, + data: Optional[Union[List, np.ndarray]] = None, + lookback: int = 0, + space: Optional[gym.Space] = None, + ): + self.data = data if data is not None else [] + self.lookback = min(lookback, len(self.data)) + self.finalized = not isinstance(self.data, list) + self.space_struct = None + self.space = space + + def __eq__( + self, + other: "InfiniteLookbackBuffer", + ) -> bool: + """Compares two `InfiniteLookbackBuffers. + + Args: + other: Another object. If another `LookbackBuffer` instance all + their attributes are compared. + + Returns: + `True`, if `other` is an `InfiniteLookbackBuffer` instance and all + attributes are identical. Otherwise, returns `False`. + """ + if isinstance(other, InfiniteLookbackBuffer): + if ( + self.data == other.data + and self.lookback == other.lookback + and self.finalized == other.finalized + and self.space_struct == other.space_struct + and self.space == other.space + ): + return True + return False + + def get_state(self) -> Dict[str, Any]: + """Returns the pickable state of a buffer. + + The data in the buffer is stored into a dictionary. Note that + buffers can also be generated from pickable states (see + `InfiniteLookbackBuffer.from_state`) + + Returns: + A dict containing all the data and metadata from the buffer. + """ + return { + "data": to_jsonable_if_needed(self.data, self.space) + if self.space + else self.data, + "lookback": self.lookback, + "finalized": self.finalized, + "space": gym_space_to_dict(self.space) if self.space else self.space, + } + + @staticmethod + def from_state(state: Dict[str, Any]) -> None: + """Creates a new `InfiniteLookbackBuffer` from a state dict. + + Args: + state: The state dict, as returned by `self.get_state`. + + Returns: + A new `InfiniteLookbackBuffer` instance with the data and metadata + from the state dict. + """ + buffer = InfiniteLookbackBuffer() + buffer.lookback = state["lookback"] + buffer.finalized = state["finalized"] + buffer.space = gym_space_from_dict(state["space"]) if state["space"] else None + buffer.space_struct = ( + get_base_struct_from_space(buffer.space) if buffer.space else None + ) + buffer.data = ( + from_jsonable_if_needed(state["data"], buffer.space) + if buffer.space + else state["data"] + ) + + return buffer + + def append(self, item) -> None: + """Appends the given item to the end of this buffer.""" + if self.finalized: + self.data = tree.map_structure( + lambda d, i: np.concatenate([d, [i]], axis=0), self.data, item + ) + else: + self.data.append(item) + + def extend(self, items) -> None: + """Appends all items in `items` to the end of this buffer.""" + if self.finalized: + # TODO (sven): When extending with a list of structs, we should + # probably rather do: `tree.map_structure(..., self.data, + # tree.map_structure(lambda *s: np.array(*s), *items)`)?? + self.data = tree.map_structure( + lambda d, i: np.concatenate([d, i], axis=0), + self.data, + # Note, we could have dictionaries here. + np.array(items) if isinstance(items, list) else items, + ) + else: + for item in items: + self.append(item) + + def concat(self, other: "InfiniteLookbackBuffer") -> None: + """Concatenates the data of `other` (w/o its lookback) to `self`. + + Args: + other: The other InfiniteLookbackBuffer to be concatenated to self. + """ + self.data.extend(other.get()) + + def pop(self, index: int = -1) -> None: + """Removes the item at `index` from this buffer, but does NOT return it. + + Args: + index: The index to pop out of this buffer (w/o returning it from this + method). + """ + if self.finalized: + self.data = tree.map_structure( + lambda s: np.delete(s, index, axis=0), self.data + ) + else: + self.data.pop(index) + + def finalize(self) -> None: + """Finalizes this buffer by converting internal data lists into numpy arrays. + + Thereby, if the individual items in the list are nested structures, the + resulting buffer content will be a nested struct of np.ndarrays (leafs). + """ + if not self.finalized: + self.data = batch(self.data) + self.finalized = True + + def get( + self, + indices: Optional[Union[int, slice, List[int]]] = None, + *, + neg_index_as_lookback: bool = False, + fill: Optional[Any] = None, + one_hot_discrete: bool = False, + _ignore_last_ts: bool = False, + _add_last_ts_value: Optional[Any] = None, + ) -> Any: + """Returns data, based on the given args, from this buffer. + + Args: + indices: A single int is interpreted as an index, from which to return the + individual data stored at this index. + A list of ints is interpreted as a list of indices from which to gather + individual data in a batch of size len(indices). + A slice object is interpreted as a range of data to be returned. + Thereby, negative indices by default are interpreted as "before the end" + unless the `neg_index_as_lookback=True` option is used, in which case + negative indices are interpreted as "before ts=0", meaning going back + into the lookback buffer. + neg_index_as_lookback: If True, negative values in `indices` are + interpreted as "before ts=0", meaning going back into the lookback + buffer. For example, a buffer with data [4, 5, 6, 7, 8, 9], + where [4, 5, 6] is the lookback buffer range (ts=0 item is 7), will + respond to `get(-1, neg_index_as_lookback=True)` with `6` and to + `get(slice(-2, 1), neg_index_as_lookback=True)` with `[5, 6, 7]`. + fill: An optional float value to use for filling up the returned results at + the boundaries. This filling only happens if the requested index range's + start/stop boundaries exceed the buffer's boundaries (including the + lookback buffer on the left side). This comes in very handy, if users + don't want to worry about reaching such boundaries and want to zero-pad. + For example, a buffer with data [10, 11, 12, 13, 14] and lookback + buffer size of 2 (meaning `10` and `11` are part of the lookback buffer) + will respond to `get(slice(-7, -2), fill=0.0)` + with `[0.0, 0.0, 10, 11, 12]`. + one_hot_discrete: If True, will return one-hot vectors (instead of + int-values) for those sub-components of a (possibly complex) space + that are Discrete or MultiDiscrete. Note that if `fill=0` and the + requested `indices` are out of the range of our data, the returned + one-hot vectors will actually be zero-hot (all slots zero). + _ignore_last_ts: Whether to ignore the last record in our internal + `self.data` when getting the provided indices. + _add_last_ts_value: Whether to add the value of this arg to the end of + the internal `self.data` buffer (just for the duration of this get + operation, not permanently). + """ + if indices is None: + data = self._get_all_data( + one_hot_discrete=one_hot_discrete, + _ignore_last_ts=_ignore_last_ts, + ) + elif isinstance(indices, slice): + data = self._get_slice( + indices, + fill=fill, + neg_index_as_lookback=neg_index_as_lookback, + one_hot_discrete=one_hot_discrete, + _ignore_last_ts=_ignore_last_ts, + _add_last_ts_value=_add_last_ts_value, + ) + elif isinstance(indices, list): + data = [ + self._get_int_index( + idx, + fill=fill, + neg_index_as_lookback=neg_index_as_lookback, + one_hot_discrete=one_hot_discrete, + _ignore_last_ts=_ignore_last_ts, + _add_last_ts_value=_add_last_ts_value, + ) + for idx in indices + ] + if self.finalized: + data = batch(data) + else: + assert isinstance(indices, int) + data = self._get_int_index( + indices, + fill=fill, + neg_index_as_lookback=neg_index_as_lookback, + one_hot_discrete=one_hot_discrete, + _ignore_last_ts=_ignore_last_ts, + _add_last_ts_value=_add_last_ts_value, + ) + + return data + + def __add__( + self, other: Union[List, "InfiniteLookbackBuffer", int, float, complex] + ) -> "InfiniteLookbackBuffer": + """Adds another InfiniteLookbackBuffer object or list to the end of this one. + + Args: + other: Another `InfiniteLookbackBuffer` or a `list` or a number. + If a `InfiniteLookbackBuffer` its data (w/o its lookback buffer) gets + concatenated to self's data. If a `list`, we concat it to self's data. + If a number, we add this number to each element of self (if possible). + + Returns: + A new `InfiniteLookbackBuffer` instance `self.data` containing + concatenated data from `self` and `other` (or adding `other` to each element + in self's data). + """ + + if self.finalized: + raise RuntimeError(f"Cannot `add` to a finalized {type(self).__name__}.") + else: + # If `other` is an int, simply add it to all our values (if possible) and + # use the result as the underlying data for the returned buffer. + if isinstance(other, (int, float, complex)): + data = [ + (d + other) if isinstance(d, (int, float, complex)) else d + for d in self.data + ] + # If `other` is a InfiniteLookbackBuffer itself, do NOT include its + # lookback buffer anymore. We assume that `other`'s lookback buffer i + # already at the end of `self`. + elif isinstance(other, InfiniteLookbackBuffer): + data = self.data + other.data[other.lookback :] + # `other` is a list, simply concat the two lists and use the result as + # the underlying data for the returned buffer. + else: + data = self.data + other + + return InfiniteLookbackBuffer( + data=data, + lookback=self.lookback, + space=self.space, + ) + + def __getitem__(self, item): + """Support squared bracket syntax, e.g. buffer[:5].""" + return self.get(item) + + def __setitem__(self, key, value): + self.set(new_data=value, at_indices=key) + + def set( + self, + new_data, + *, + at_indices: Optional[Union[int, slice, List[int]]] = None, + neg_index_as_lookback: bool = False, + ) -> None: + """Overwrites all or some of the data in this buffer with the provided data. + + Args: + new_data: The new data to overwrite existing records with. + at_indices: A single int is interpreted as an index, at which to overwrite + the individual record stored at this index with `new_data`. + A list of ints is interpreted as a list of indices, which to overwrite + with `new_data`, which must be a batch of size `len(at_indices)`. + A slice object is interpreted as a range, which to overwrite with + `new_data`. Thereby, negative indices by default are interpreted as + "before the end" unless the `neg_index_as_lookback=True` option is + used, in which case negative indices are interpreted as + "before ts=0", meaning going back into the lookback buffer. + neg_index_as_lookback: If True, negative values in `at_indices` are + interpreted as "before ts=0", meaning going back into the lookback + buffer. For example, a buffer with data [4, 5, 6, 7, 8, 9], + where [4, 5, 6] is the lookback buffer range (ts=0 item is 7), will + handle a call `set(99, at_indices=-1, neg_index_as_lookback=True)` + with `6` being replaced by 99 and to `set([98, 99, 100], + at_indices=slice(-2, 1), neg_index_as_lookback=True)` with + `[5, 6, 7]` being replaced by `[98, 99, 100]`. + """ + # `at_indices` is None -> Override all our data (excluding the lookback buffer). + if at_indices is None: + self._set_all_data(new_data) + + elif isinstance(at_indices, slice): + self._set_slice( + new_data, + slice_=at_indices, + neg_index_as_lookback=neg_index_as_lookback, + ) + elif isinstance(at_indices, list): + for i, idx in enumerate(at_indices): + self._set_int_index( + new_data[i], + idx=idx, + neg_index_as_lookback=neg_index_as_lookback, + ) + else: + assert isinstance(at_indices, int) + self._set_int_index( + new_data, + idx=at_indices, + neg_index_as_lookback=neg_index_as_lookback, + ) + + def __len__(self): + """Return the length of our data, excluding the lookback buffer.""" + len_ = self.len_incl_lookback() + # Only count the data after the lookback. + return max(len_ - self.lookback, 0) + + def len_incl_lookback(self): + if self.finalized: + return len(tree.flatten(self.data)[0]) + else: + return len(self.data) + + def __repr__(self): + return ( + f"{type(self).__name__}({self.data[:self.lookback]} <- " + f"lookback({self.lookback}) | {self.data[self.lookback:]})" + ) + + def _get_all_data(self, one_hot_discrete=False, _ignore_last_ts=False): + data = self[: (None if not _ignore_last_ts else -1)] + if one_hot_discrete: + data = self._one_hot(data, space_struct=self.space_struct) + return data + + def _set_all_data(self, new_data): + self._set_slice(new_data, slice(0, None)) + + def _get_slice( + self, + slice_, + fill=None, + neg_index_as_lookback=False, + one_hot_discrete=False, + _ignore_last_ts=False, + _add_last_ts_value=None, + ): + data_to_use = self.data + if _ignore_last_ts: + if self.finalized: + data_to_use = tree.map_structure(lambda s: s[:-1], self.data) + else: + data_to_use = self.data[:-1] + if _add_last_ts_value is not None: + if self.finalized: + data_to_use = tree.map_structure( + lambda s, t: np.append(s, t), + data_to_use.copy(), + _add_last_ts_value, + ) + else: + data_to_use = np.append(data_to_use.copy(), _add_last_ts_value) + + slice_, slice_len, fill_left_count, fill_right_count = self._interpret_slice( + slice_, + neg_index_as_lookback, + len_self_plus_lookback=( + self.len_incl_lookback() + + int(_add_last_ts_value is not None) + - int(_ignore_last_ts) + ), + ) + + # Perform the actual slice. + data_slice = None + if slice_len > 0: + if self.finalized: + data_slice = tree.map_structure(lambda s: s[slice_], data_to_use) + else: + data_slice = data_to_use[slice_] + + if one_hot_discrete: + data_slice = self._one_hot(data_slice, space_struct=self.space_struct) + + # Data is shorter than the range requested -> Fill the rest with `fill` data. + if fill is not None and (fill_right_count > 0 or fill_left_count > 0): + if self.finalized: + if fill_left_count: + if self.space is None: + fill_batch = np.array([fill] * fill_left_count) + else: + fill_batch = get_dummy_batch_for_space( + self.space, + fill_value=fill, + batch_size=fill_left_count, + one_hot_discrete=one_hot_discrete, + ) + if data_slice is not None: + data_slice = tree.map_structure( + lambda s0, s: np.concatenate([s0, s]), + fill_batch, + data_slice, + ) + else: + data_slice = fill_batch + if fill_right_count: + if self.space is None: + fill_batch = np.array([fill] * fill_right_count) + else: + fill_batch = get_dummy_batch_for_space( + self.space, + fill_value=fill, + batch_size=fill_right_count, + one_hot_discrete=one_hot_discrete, + ) + if data_slice is not None: + data_slice = tree.map_structure( + lambda s0, s: np.concatenate([s, s0]), + fill_batch, + data_slice, + ) + else: + data_slice = fill_batch + + else: + if self.space is None: + fill_batch = [fill] + else: + fill_batch = [ + get_dummy_batch_for_space( + self.space, + fill_value=fill, + batch_size=0, + one_hot_discrete=one_hot_discrete, + ) + ] + data_slice = ( + fill_batch * fill_left_count + + (data_slice if data_slice is not None else []) + + fill_batch * fill_right_count + ) + + if data_slice is None: + if self.finalized: + return tree.map_structure(lambda s: s[slice_], data_to_use) + else: + return data_to_use[slice_] + return data_slice + + def _set_slice( + self, + new_data, + slice_, + neg_index_as_lookback=False, + ): + slice_, _, _, _ = self._interpret_slice(slice_, neg_index_as_lookback) + + # Check, whether the setting to new_data changes the length of self + # (it shouldn't). If it does, raise an error. + try: + if self.finalized: + + def __set(s, n): + if self.space: + assert self.space.contains(n[0]) + assert len(s[slice_]) == len(n) + s[slice_] = n + + tree.map_structure(__set, self.data, new_data) + else: + assert len(self.data[slice_]) == len(new_data) + self.data[slice_] = new_data + except AssertionError: + raise IndexError( + f"Cannot `set()` value via at_indices={slice_} (option " + f"neg_index_as_lookback={neg_index_as_lookback})! Slice of data " + "does NOT have the same size as `new_data`." + ) + + def _get_int_index( + self, + idx: int, + fill=None, + neg_index_as_lookback=False, + one_hot_discrete=False, + _ignore_last_ts=False, + _add_last_ts_value=None, + ): + data_to_use = self.data + if _ignore_last_ts: + if self.finalized: + data_to_use = tree.map_structure(lambda s: s[:-1], self.data) + else: + data_to_use = self.data[:-1] + if _add_last_ts_value is not None: + if self.finalized: + data_to_use = tree.map_structure( + lambda s, last: np.append(s, last), data_to_use, _add_last_ts_value + ) + else: + data_to_use = data_to_use.copy() + data_to_use.append(_add_last_ts_value) + + # If index >= 0 -> Ignore lookback buffer. + # Otherwise, include lookback buffer. + if idx >= 0 or neg_index_as_lookback: + idx = self.lookback + idx + # Negative indices mean: Go to left into lookback buffer starting from idx=0. + # But if we pass the lookback buffer, the index should be invalid and we will + # have to fill, if required. Invalidate the index by setting it to one larger + # than max. + if neg_index_as_lookback and idx < 0: + idx = len(self) + self.lookback - (_ignore_last_ts is True) + + try: + if self.finalized: + data = tree.map_structure(lambda s: s[idx], data_to_use) + else: + data = data_to_use[idx] + # Out of range index -> If `fill`, use a fill dummy (B=0), if not, error out. + except IndexError as e: + if fill is not None: + if self.space is None: + return fill + return get_dummy_batch_for_space( + self.space, + fill_value=fill, + batch_size=0, + one_hot_discrete=one_hot_discrete, + ) + else: + raise e + + # Convert discrete/multi-discrete components to one-hot vectors, if required. + if one_hot_discrete: + data = self._one_hot(data, self.space_struct) + return data + + def _set_int_index(self, new_data, idx, neg_index_as_lookback): + actual_idx = idx + # If index >= 0 -> Ignore lookback buffer. + # Otherwise, include lookback buffer. + if actual_idx >= 0 or neg_index_as_lookback: + actual_idx = self.lookback + actual_idx + # Negative indices mean: Go to left into lookback buffer starting from idx=0. + # But if we pass the lookback buffer, the index should be invalid and we will + # have to fill, if required. Invalidate the index by setting it to one larger + # than max. + if neg_index_as_lookback and actual_idx < 0: + actual_idx = len(self) + self.lookback + + try: + if self.finalized: + + def __set(s, n): + if self.space: + assert self.space.contains(n), n + s[actual_idx] = n + + tree.map_structure(__set, self.data, new_data) + else: + self.data[actual_idx] = new_data + except IndexError: + raise IndexError( + f"Cannot `set()` value at index {idx} (option " + f"neg_index_as_lookback={neg_index_as_lookback})! Out of range " + f"of buffer data." + ) + + def _interpret_slice( + self, + slice_, + neg_index_as_lookback, + len_self_plus_lookback=None, + ): + if len_self_plus_lookback is None: + len_self_plus_lookback = len(self) + self.lookback + + # Re-interpret slice bounds as absolute positions (>=0) within our + # internal data. + start = slice_.start + stop = slice_.stop + + # Start is None -> Exclude lookback buffer. + if start is None: + start = self.lookback + # Start is negative. + elif start < 0: + # `neg_index_as_lookback=True` -> User wants to index into the lookback + # range. + if neg_index_as_lookback: + start = self.lookback + start + # Interpret index as counting "from end". + else: + start = len_self_plus_lookback + start + # Start is 0 or positive -> timestep right after lookback is interpreted as 0. + else: + start = self.lookback + start + + # Stop is None -> Set stop to very last index + 1 of our internal data. + if stop is None: + stop = len_self_plus_lookback + # Stop is negative. + elif stop < 0: + # `neg_index_as_lookback=True` -> User wants to index into the lookback + # range. Set to 0 (beginning of lookback buffer) if result is a negative + # index. + if neg_index_as_lookback: + stop = self.lookback + stop + # Interpret index as counting "from end". Set to 0 (beginning of actual + # episode) if result is a negative index. + else: + stop = len_self_plus_lookback + stop + # Stop is positive -> Add lookback range to it. + else: + stop = self.lookback + stop + + fill_left_count = fill_right_count = 0 + # Both start and stop are on left side. + if start < 0 and stop < 0: + fill_left_count = abs(start - stop) + fill_right_count = 0 + start = stop = 0 + # Both start and stop are on right side. + elif start >= len_self_plus_lookback and stop >= len_self_plus_lookback: + fill_right_count = abs(start - stop) + fill_left_count = 0 + start = stop = len_self_plus_lookback + # Set to 0 (beginning of actual episode) if result is a negative index. + elif start < 0: + fill_left_count = -start + start = 0 + elif stop >= len_self_plus_lookback: + fill_right_count = stop - len_self_plus_lookback + stop = len_self_plus_lookback + # Only `stop` might be < 0, when slice has negative step and start is > 0. + elif stop < 0: + if start >= len_self_plus_lookback: + fill_left_count = start - len_self_plus_lookback + 1 + start = len_self_plus_lookback - 1 + fill_right_count = -stop - 1 + stop = -LARGE_INTEGER + + assert start >= 0 and (stop >= 0 or stop == -LARGE_INTEGER), (start, stop) + + step = slice_.step if slice_.step is not None else 1 + slice_ = slice(start, stop, step) + slice_len = max(0, (stop - start + (step - (1 if step > 0 else -1))) // step) + return slice_, slice_len, fill_left_count, fill_right_count + + def _one_hot(self, data, space_struct): + if space_struct is None: + raise ValueError( + f"Cannot `one_hot` data in `{type(self).__name__}` if a " + "gym.Space was NOT provided during construction!" + ) + + def _convert(dat_, space): + if isinstance(space, gym.spaces.Discrete): + return one_hot(dat_, depth=space.n) + elif isinstance(space, gym.spaces.MultiDiscrete): + return one_hot_multidiscrete(dat_, depths=space.nvec) + return dat_ + + if isinstance(data, list): + data = [ + tree.map_structure(_convert, dslice, space_struct) for dslice in data + ] + else: + data = tree.map_structure(_convert, data, space_struct) + return data diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51e7e5d56bbb060212f659660df667969fed3f0a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/dm_control_wrapper.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/dm_control_wrapper.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92d535bf0e3121899c9e955e4296ca2b8fed9ee6 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/dm_control_wrapper.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/dm_env_wrapper.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/dm_env_wrapper.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..668795697bb8d792544203f305738bbb926b4931 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/dm_env_wrapper.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/pettingzoo_env.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/pettingzoo_env.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec5164b2df113a1cc2d9ef41ee699848de9fba5a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/env/wrappers/__pycache__/pettingzoo_env.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/models/__pycache__/neural_computer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/models/__pycache__/neural_computer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90e3fc94d1ecbee2265481bd285f8cefccdce9ab Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/models/__pycache__/neural_computer.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..994bdc3cc44b6ef9fd63d35700d6a25561eb91ea Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__pycache__/random_policy.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__pycache__/random_policy.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..610bce5c1ba51f43d7361fa7407e91b445ce4d7e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/__pycache__/random_policy.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..c9a4758f81ea427f577c6e8004e4ffeb8f3bef06 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py @@ -0,0 +1,100 @@ +# @OldAPIStack +import gymnasium as gym +from typing import Dict, Union, List, Tuple, Optional +import numpy as np + +from ray.rllib.policy.policy import Policy, ViewRequirement +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.models.torch.torch_action_dist import TorchCategorical +from ray.rllib.utils.typing import AlgorithmConfigDict, TensorStructType, TensorType +from ray.rllib.utils.annotations import override +from ray.rllib.utils.debug import update_global_seed_if_necessary + + +class CliffWalkingWallPolicy(Policy): + """Optimal RLlib policy for the CliffWalkingWallEnv environment, defined in + ray/rllib/examples/env/cliff_walking_wall_env.py, with epsilon-greedy exploration. + + The policy takes a random action with probability epsilon, specified + by `config["epsilon"]`, and the optimal action with probability 1 - epsilon. + """ + + @override(Policy) + def __init__( + self, + observation_space: gym.Space, + action_space: gym.Space, + config: AlgorithmConfigDict, + ): + update_global_seed_if_necessary(seed=config.get("seed")) + super().__init__(observation_space, action_space, config) + + # Known optimal action dist for each of the 48 states and 4 actions + self.action_dist = np.zeros((48, 4), dtype=float) + # Starting state: go up + self.action_dist[36] = (1, 0, 0, 0) + # Cliff + Goal: never actually used, set to random + self.action_dist[37:] = (0.25, 0.25, 0.25, 0.25) + # Row 2; always go right + self.action_dist[24:36] = (0, 1, 0, 0) + # Row 0 and Row 1; go down or go right + self.action_dist[0:24] = (0, 0.5, 0.5, 0) + # Col 11; always go down, supercedes previous values + self.action_dist[[11, 23, 35]] = (0, 0, 1, 0) + assert np.allclose(self.action_dist.sum(-1), 1) + + # Epsilon-Greedy action selection + epsilon = config.get("epsilon", 0.0) + self.action_dist = self.action_dist * (1 - epsilon) + epsilon / 4 + assert np.allclose(self.action_dist.sum(-1), 1) + + # Attributes required for RLlib; note that while CliffWalkingWallPolicy + # inherits from Policy, it actually implements TorchPolicyV2. + self.view_requirements[SampleBatch.ACTION_PROB] = ViewRequirement() + self.device = "cpu" + self.model = None + self.dist_class = TorchCategorical + + @override(Policy) + def compute_actions( + self, + obs_batch: Union[List[TensorStructType], TensorStructType], + state_batches: Optional[List[TensorType]] = None, + **kwargs, + ) -> Tuple[TensorType, List[TensorType], Dict[str, TensorType]]: + obs = np.array(obs_batch, dtype=int) + action_probs = self.action_dist[obs] + actions = np.zeros(len(obs), dtype=int) + for i in range(len(obs)): + actions[i] = np.random.choice(4, p=action_probs[i]) + return ( + actions, + [], + {SampleBatch.ACTION_PROB: action_probs[np.arange(len(obs)), actions]}, + ) + + @override(Policy) + def compute_log_likelihoods( + self, + actions: Union[List[TensorType], TensorType], + obs_batch: Union[List[TensorType], TensorType], + **kwargs, + ) -> TensorType: + obs = np.array(obs_batch, dtype=int) + actions = np.array(actions, dtype=int) + # Compute action probs for all possible actions + action_probs = self.action_dist[obs] + # Take the action_probs corresponding to the specified actions + action_probs = action_probs[np.arange(len(obs)), actions] + # Ignore RuntimeWarning thrown by np.log(0) if action_probs is 0 + with np.errstate(divide="ignore"): + return np.log(action_probs) + + def action_distribution_fn( + self, model, obs_batch: TensorStructType, **kwargs + ) -> Tuple[TensorType, type, List[TensorType]]: + obs = np.array(obs_batch[SampleBatch.OBS], dtype=int) + action_probs = self.action_dist[obs] + # Ignore RuntimeWarning thrown by np.log(0) if action_probs is 0 + with np.errstate(divide="ignore"): + return np.log(action_probs), TorchCategorical, None diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/random_policy.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/random_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..c410ba0ec464e73fe380741180e71b56c9967a5b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/examples/_old_api_stack/policy/random_policy.py @@ -0,0 +1,101 @@ +# @OldAPIStack +from gymnasium.spaces import Box +import numpy as np +import random +import tree # pip install dm_tree +from typing import ( + List, + Optional, + Union, +) + +from ray.rllib.policy.policy import Policy +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.annotations import override +from ray.rllib.utils.typing import ModelWeights, TensorStructType, TensorType + + +class RandomPolicy(Policy): + """Hand-coded policy that returns random actions.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Whether for compute_actions, the bounds given in action_space + # should be ignored (default: False). This is to test action-clipping + # and any Env's reaction to bounds breaches. + if self.config.get("ignore_action_bounds", False) and isinstance( + self.action_space, Box + ): + self.action_space_for_sampling = Box( + -float("inf"), + float("inf"), + shape=self.action_space.shape, + dtype=self.action_space.dtype, + ) + else: + self.action_space_for_sampling = self.action_space + + @override(Policy) + def init_view_requirements(self): + super().init_view_requirements() + # Disable for_training and action attributes for SampleBatch.INFOS column + # since it can not be properly batched. + vr = self.view_requirements[SampleBatch.INFOS] + vr.used_for_training = False + vr.used_for_compute_actions = False + + @override(Policy) + def compute_actions( + self, + obs_batch: Union[List[TensorStructType], TensorStructType], + state_batches: Optional[List[TensorType]] = None, + prev_action_batch: Union[List[TensorStructType], TensorStructType] = None, + prev_reward_batch: Union[List[TensorStructType], TensorStructType] = None, + **kwargs, + ): + # Alternatively, a numpy array would work here as well. + # e.g.: np.array([random.choice([0, 1])] * len(obs_batch)) + obs_batch_size = len(tree.flatten(obs_batch)[0]) + return ( + [self.action_space_for_sampling.sample() for _ in range(obs_batch_size)], + [], + {}, + ) + + @override(Policy) + def learn_on_batch(self, samples): + """No learning.""" + return {} + + @override(Policy) + def compute_log_likelihoods( + self, + actions, + obs_batch, + state_batches=None, + prev_action_batch=None, + prev_reward_batch=None, + **kwargs, + ): + return np.array([random.random()] * len(obs_batch)) + + @override(Policy) + def get_weights(self) -> ModelWeights: + """No weights to save.""" + return {} + + @override(Policy) + def set_weights(self, weights: ModelWeights) -> None: + """No weights to set.""" + pass + + @override(Policy) + def _get_dummy_batch_from_view_requirements(self, batch_size: int = 1): + return SampleBatch( + { + SampleBatch.OBS: tree.map_structure( + lambda s: s[None], self.observation_space.sample() + ), + } + ) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/cartpole_recording.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/cartpole_recording.py new file mode 100644 index 0000000000000000000000000000000000000000..42258ac46fe0b9236bf3008619ba83a5eee3a2ab --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/cartpole_recording.py @@ -0,0 +1,163 @@ +"""Example showing how to record expert data from a trained policy. + +This example: + - demonstrates how you can train a single-agent expert PPO Policy (RLModule) + and checkpoint it. + - shows how you can then record expert data from the trained PPO Policy to + disk during evaluation. + +How to run this script +---------------------- +`python [script file name].py --checkpoint-at-end` + +For debugging, use the following additional command line options +`--no-tune --num-env-runners=0` +which should allow you to set breakpoints anywhere in the RLlib code and +have the execution stop there for inspection and debugging. + +For logging to your WandB account, use: +`--wandb-key=[your WandB API key] --wandb-project=[some project name] +--wandb-run-name=[optional: WandB run name (within the defined project)]` + +Results to expect +----------------- +In the console output you can see that the episode return of 350.0 is reached +before the timestep stop criteria is touched. Afterwards evaluation starts and +runs 10 iterations while recording the data. The number of recorded experiences +might differ from evaluation run to evaluation run because evaluation +`EnvRunner`s sample episodes while recording timesteps and episodes contain +usually different numbers of timesteps. Note, this is different when recording +episodes - in this case each row is one episode. + ++-----------------------------+------------+----------------------+ +| Trial name | status | loc | +| | | | +|-----------------------------+------------+----------------------+ +| PPO_CartPole-v1_df83f_00000 | TERMINATED | 192.168.0.119:233661 | ++-----------------------------+------------+----------------------+ ++--------+------------------+------------------------+------------------------+ +| iter | total time (s) | num_training_step_ca | num_env_steps_sample | +| | | lls_per_iteration | d_lifetime | ++--------+------------------+------------------------+------------------------| +| 21 | 25.9162 | 1 | 84000 | ++--------+------------------+------------------------+------------------------+ + +... + +Number of experiences recorded: 26644 +""" + +import ray + +from ray.rllib.algorithms.ppo import PPOConfig +from ray.rllib.core import COMPONENT_RL_MODULE +from ray.rllib.core.columns import Columns +from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig +from ray.rllib.utils.metrics import ( + ENV_RUNNER_RESULTS, + EPISODE_RETURN_MEAN, + EVALUATION_RESULTS, + NUM_ENV_STEPS_SAMPLED_LIFETIME, +) +from ray.rllib.utils.test_utils import add_rllib_example_script_args + +parser = add_rllib_example_script_args( + default_timesteps=200000, + default_reward=350.0, +) +parser.set_defaults(checkpoint_at_end=True, max_concurrent_trials=1) +# Use `parser` to add your own custom command line options to this script +# and (if needed) use their values to set up `config` below. +args = parser.parse_args() + +config = ( + PPOConfig() + .env_runners( + num_env_runners=5, + ) + .environment("CartPole-v1") + .rl_module( + model_config=DefaultModelConfig( + fcnet_hiddens=[32], + fcnet_activation="linear", + vf_share_layers=True, + ), + ) + .training( + lr=0.0003, + num_epochs=6, + vf_loss_coeff=0.01, + ) + .evaluation( + evaluation_num_env_runners=1, + evaluation_interval=1, + evaluation_parallel_to_training=True, + evaluation_config=PPOConfig.overrides(explore=False), + ) +) + +stop = { + f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": args.stop_timesteps, + f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": ( + args.stop_reward + ), +} + + +if __name__ == "__main__": + from ray.rllib.utils.test_utils import run_rllib_example_script_experiment + + results = run_rllib_example_script_experiment(config, args, stop=stop) + + # Store the best checkpoint for recording. + best_checkpoint = results.get_best_result( + metric=f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}", + mode="max", + ).checkpoint.path + + # Configure the algorithm for offline recording. + config.offline_data( + output="local:///tmp/cartpole/", + # Store columnar (tabular) data. + output_write_episodes=False, + # Each file should hold 1,000 rows. + output_max_rows_per_file=1000, + output_write_remaining_data=True, + # LZ4-compress columns 'obs', 'new_obs', and 'actions' to + # save disk space and increase performance. Note, this means + # that you have to use `input_compress_columns` in the same + # way when using the data for training in `RLlib`. + output_compress_columns=[Columns.OBS, Columns.ACTIONS], + ) + # Change the evaluation settings to sample exactly 50 episodes + # per evaluation iteration and increase the number of evaluation + # env-runners to 5. + config.evaluation( + evaluation_num_env_runners=5, + evaluation_duration=50, + evaluation_duration_unit="episodes", + evaluation_interval=1, + evaluation_parallel_to_training=False, + evaluation_config=PPOConfig.overrides(explore=False), + ) + + # Build the algorithm for evaluation. + algo = config.build() + # Load the checkpoint stored above. + algo.restore_from_path( + best_checkpoint, + component=COMPONENT_RL_MODULE, + ) + + # Evaluate over 10 iterations and record the data. + for i in range(10): + print(f"Iteration: {i + 1}:\n") + res = algo.evaluate() + print(res) + + # Stop the algorithm. + algo.stop() + + # Check the number of rows in the dataset. + ds = ray.data.read_parquet("local:///tmp/cartpole") + print(f"Number of experiences recorded: {ds.count()}") diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/offline_rl_with_image_data.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/offline_rl_with_image_data.py new file mode 100644 index 0000000000000000000000000000000000000000..1a88aeeb323869353cc811f40e161f516aa49d44 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/offline_rl_with_image_data.py @@ -0,0 +1,112 @@ +"""Example showing how to customize an offline data pipeline. + +This example: + - demonstrates how you can customized your offline data pipeline. + - shows how you can override the `OfflineData` to read raw image + data and transform it into `numpy ` arrays. + - explains how you can override the `OfflinePreLearner` to + transform data further into `SingleAgentEpisode` instances that + can be processes by the learner connector pipeline. + +How to run this script +---------------------- +`python [script file name].py --checkpoint-at-end` + +For debugging, use the following additional command line options +`--no-tune --num-env-runners=0` +which should allow you to set breakpoints anywhere in the RLlib code and +have the execution stop there for inspection and debugging. + +For logging to your WandB account, use: +`--wandb-key=[your WandB API key] --wandb-project=[some project name] +--wandb-run-name=[optional: WandB run name (within the defined project)]` + +Results to expect +----------------- +2024-12-03 19:59:23,043 INFO streaming_executor.py:109 -- Execution plan +of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadBinary] -> +TaskPoolMapOperator[Map(map_to_numpy)] -> LimitOperator[limit=128] +✔️ Dataset execution finished in 10.01 seconds: 100%|███████████████████ +███████████████████████████████████████████████████████████████████████| +3.00/3.00 [00:10<00:00, 3.34s/ row] +- ReadBinary->SplitBlocks(11): Tasks: 0; Queued blocks: 0; Resources: 0.0 +CPU, 0.0B object store: 100%|█████████████████████████████████████████| +3.00/3.00 [00:10<00:00, 3.34s/ row] +- Map(map_to_numpy): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, +0.0B object store: 100%|███████████████████████████████████████████████████| +3.00/3.00 [00:10<00:00, 3.34s/ row] +- limit=128: Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 3.0KB object +store: 100%|██████████████████████████████████████████████████████████| +3.00/3.00 [00:10<00:00, 3.34s/ row] +Batch: {'batch': [MultiAgentBatch({}, env_steps=3)]} +""" + +import gymnasium as gym +import numpy as np + +from ray.rllib.algorithms.bc import BCConfig +from ray.rllib.algorithms.bc.bc_catalog import BCCatalog +from ray.rllib.algorithms.bc.torch.bc_torch_rl_module import BCTorchRLModule +from ray.rllib.core.rl_module.rl_module import RLModuleSpec, DefaultModelConfig +from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec +from ray.rllib.examples.offline_rl.classes.image_offline_data import ImageOfflineData +from ray.rllib.examples.offline_rl.classes.image_offline_prelearner import ( + ImageOfflinePreLearner, +) + +# Create an Algorithm configuration. +# TODO: Make this an actually running/learning example with RLunplugged +# data from S3 and add this to the CI. +config = ( + BCConfig() + .environment( + action_space=gym.spaces.Discrete(2), + observation_space=gym.spaces.Box(0, 255, (32, 32, 3), np.float32), + ) + .offline_data( + input_=["s3://anonymous@ray-example-data/batoidea/JPEGImages/"], + prelearner_class=ImageOfflinePreLearner, + ) +) + +# Specify an `RLModule` and wrap it with a `MultiRLModuleSpec`. Note, +# on `Learner`` side any `RLModule` is an `MultiRLModule`. +module_spec = MultiRLModuleSpec( + rl_module_specs={ + "default_policy": RLModuleSpec( + model_config=DefaultModelConfig( + conv_filters=[[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]], + conv_activation="relu", + ), + inference_only=False, + module_class=BCTorchRLModule, + catalog_class=BCCatalog, + action_space=gym.spaces.Discrete(2), + observation_space=gym.spaces.Box(0, 255, (32, 32, 3), np.float32), + ), + }, +) + +# Construct your `OfflineData` class instance. +offline_data = ImageOfflineData(config) + +# Check, how the data is transformed. Note, the +# example dataset has only 3 such images. +batch = offline_data.data.take_batch(3) + +# Construct your `OfflinePreLearner`. +offline_prelearner = ImageOfflinePreLearner( + config=config, + learner=None, + spaces=( + config.observation_space, + config.action_space, + ), + module_spec=module_spec, +) + +# Transform the raw data to `MultiAgentBatch` data. +batch = offline_prelearner(batch) + +# Show the transformed batch. +print(f"Batch: {batch}") diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..d965add3355279dee9ad8301560dc091ed5c65e4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py @@ -0,0 +1,176 @@ +# @HybridAPIStack + +"""Example showing how to train a (SA) BC RLModule while evaluating in a MA setup. + +Here, SA=single-agent and MA=multi-agent. + +Note that the BC Algorithm - by default - runs on the hybrid API stack, using RLModules, +but not `ConnectorV2` and `SingleAgentEpisode` yet. + +This example: + - demonstrates how you can train a single-agent BC Policy (RLModule) from a JSON + file, which contains SampleBatch (expert or non-expert) data. + - shows how you can run evaluation in a multi-agent setup (for example vs one + or more heuristic policies), while training the BC Policy. + + +How to run this script +---------------------- +`python [script file name].py --checkpoint-at-end` + +For debugging, use the following additional command line options +`--no-tune --num-env-runners=0` +which should allow you to set breakpoints anywhere in the RLlib code and +have the execution stop there for inspection and debugging. + +For logging to your WandB account, use: +`--wandb-key=[your WandB API key] --wandb-project=[some project name] +--wandb-run-name=[optional: WandB run name (within the defined project)]` + + +Results to expect +----------------- +In the console output, you can see that the episode returns of the "main" policy on +the evaluation track keep increasing as BC manages to more and more clone the behavior +found in our (expert) JSON file. + +After 50-100 iterations, you should see the episode reward reach 450.0. +Note that the opponent (random) policy does not learn as it's a) not a trainable +RLModule and b) not being trained via the BCConfig. It's only used for evaluation +purposes here. + ++---------------------+------------+-----------------+--------+--------+ +| Trial name | status | loc | iter | ts | +|---------------------+------------+-----------------+--------+--------+ +| BC_None_ee65e_00000 | TERMINATED | 127.0.0.1:35031 | 93 | 203754 | ++---------------------+------------+-----------------+--------+--------+ ++----------------------+------------------------+ +| eps. return (main) | eps. return (random) | +|----------------------+------------------------| +| 452.4 | 28.3 | ++----------------------+------------------------+ +""" +import os +from pathlib import Path + +import gymnasium as gym + +from ray import tune +from ray.air.constants import TRAINING_ITERATION +from ray.rllib.algorithms.bc import BCConfig +from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole +from ray.rllib.examples._old_api_stack.policy.random_policy import RandomPolicy +from ray.rllib.policy.policy import PolicySpec +from ray.rllib.utils.metrics import ( + ENV_RUNNER_RESULTS, + EVALUATION_RESULTS, + NUM_ENV_STEPS_TRAINED, +) +from ray.rllib.utils.test_utils import ( + add_rllib_example_script_args, + run_rllib_example_script_experiment, +) +from ray.train.constants import TIME_TOTAL_S +from ray.tune.registry import register_env + +parser = add_rllib_example_script_args( + default_reward=450.0, + default_timesteps=300000, +) +parser.set_defaults(num_agents=2) + + +if __name__ == "__main__": + args = parser.parse_args() + + register_env("multi_cart", lambda cfg: MultiAgentCartPole(cfg)) + dummy_env = gym.make("CartPole-v1") + + rllib_dir = Path(__file__).parent.parent.parent + print(f"rllib dir={rllib_dir}") + offline_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json") + + base_config = ( + BCConfig() + # For offline RL, we do not specify an env here (b/c we don't want any env + # instances created on the EnvRunners). Instead, we'll provide observation- + # and action-spaces here for the RLModule to know its input- and output types. + .environment( + observation_space=dummy_env.observation_space, + action_space=dummy_env.action_space, + ) + .offline_data( + input_=offline_file, + # The number of iterations to be run per learner when in multi-learner + # mode in a single RLlib training iteration. Leave this to `None` to + # run an entire epoch on the dataset during a single RLlib training + # iteration. For single-learner mode, 1 is the only option. + dataset_num_iters_per_learner=1 if not args.num_learners else None, + ) + .multi_agent( + policies={"main"}, + policy_mapping_fn=lambda *a, **kw: "main", + ) + .evaluation( + evaluation_interval=1, + evaluation_num_env_runners=0, + evaluation_config=BCConfig.overrides( + # Evaluate on an actual env -> switch input back to "sampler". + input_="sampler", + # Do not explore during evaluation, but act greedily. + explore=False, + # Use a multi-agent setup for evaluation. + env="multi_cart", + env_config={"num_agents": args.num_agents}, + policies={ + "main": PolicySpec(), + "random": PolicySpec(policy_class=RandomPolicy), + }, + # Only control agent 0 with the main (trained) policy. + policy_mapping_fn=( + lambda aid, *a, **kw: "main" if aid == 0 else "random" + ), + # Note that we do NOT have to specify the `policies_to_train` here, + # b/c we are inside the evaluation config (no policy is trained during + # evaluation). The fact that the BCConfig above is "only" setup + # as single-agent makes it automatically only train the policy found in + # the BCConfig's `policies` field (which is "main"). + # policies_to_train=["main"], + ), + ) + ) + + policy_eval_returns = ( + f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/policy_reward_mean/" + ) + + stop = { + # Check for the "main" policy's episode return, not the combined one. + # The combined one is the sum of the "main" policy + the "random" one. + policy_eval_returns + "main": args.stop_reward, + NUM_ENV_STEPS_TRAINED: args.stop_timesteps, + TRAINING_ITERATION: args.stop_iters, + } + + run_rllib_example_script_experiment( + base_config, + args, + stop=stop, + success_metric={policy_eval_returns + "main": args.stop_reward}, + # We use a special progress reporter here to show the evaluation results (of the + # "main" policy). + # In the following dict, the keys are the (possibly nested) keys that can be + # found in RLlib's (BC's) result dict, produced at every training iteration, and + # the values are the column names you would like to see in your console reports. + # Note that for nested result dict keys, you need to use slashes "/" to define + # the exact path. + progress_reporter=tune.CLIReporter( + metric_columns={ + TRAINING_ITERATION: "iter", + TIME_TOTAL_S: "total time (s)", + NUM_ENV_STEPS_TRAINED: "ts", + policy_eval_returns + "main": "eps. return (main)", + policy_eval_returns + "random": "eps. return (random)", + } + ), + ) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/saving_experiences.py b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/saving_experiences.py new file mode 100644 index 0000000000000000000000000000000000000000..27c76c264da98c74ff9ebd26408de8fc85dfedc8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/examples/offline_rl/saving_experiences.py @@ -0,0 +1,62 @@ +# @OldAPIStack + +"""Simple example of writing experiences to a file using JsonWriter.""" + +# __sphinx_doc_begin__ +import gymnasium as gym +import numpy as np +import os + +import ray._private.utils + +from ray.rllib.models.preprocessors import get_preprocessor +from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder +from ray.rllib.offline.json_writer import JsonWriter + +if __name__ == "__main__": + batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder + writer = JsonWriter( + os.path.join(ray._private.utils.get_user_temp_dir(), "demo-out") + ) + + # You normally wouldn't want to manually create sample batches if a + # simulator is available, but let's do it anyways for example purposes: + env = gym.make("CartPole-v1") + + # RLlib uses preprocessors to implement transforms such as one-hot encoding + # and flattening of tuple and dict observations. For CartPole a no-op + # preprocessor is used, but this may be relevant for more complex envs. + prep = get_preprocessor(env.observation_space)(env.observation_space) + print("The preprocessor is", prep) + + for eps_id in range(100): + obs, info = env.reset() + prev_action = np.zeros_like(env.action_space.sample()) + prev_reward = 0 + terminated = truncated = False + t = 0 + while not terminated and not truncated: + action = env.action_space.sample() + new_obs, rew, terminated, truncated, info = env.step(action) + batch_builder.add_values( + t=t, + eps_id=eps_id, + agent_index=0, + obs=prep.transform(obs), + actions=action, + action_prob=1.0, # put the true action probability here + action_logp=0.0, + rewards=rew, + prev_actions=prev_action, + prev_rewards=prev_reward, + terminateds=terminated, + truncateds=truncated, + infos=info, + new_obs=prep.transform(new_obs), + ) + obs = new_obs + prev_action = action + prev_reward = rew + t += 1 + writer.write(batch_builder.build_and_reset()) +# __sphinx_doc_end__