koichi12 commited on Feb 12, 2025

Commit

1d3adaf

verified ·

1 Parent(s): f710598

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/columns.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/columns.py +73 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/catalog.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/configs.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/base.py +444 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/catalog.py +667 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/configs.py +1095 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_dict.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/typing.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_base.py +226 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_dict.py +84 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/typing.py +10 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/encoder.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/heads.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/primitives.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/base.py +53 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/encoder.py +315 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/heads.py +198 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/primitives.py +429 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/encoder.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/primitives.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/base.py +98 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/encoder.py +284 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/heads.py +197 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/primitives.py +479 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/utils.py +85 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/bc_algorithm.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/testing_learner.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/bc_algorithm.py +49 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/testing_learner.py +75 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__init__.py +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_learner.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_module.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_learner.py +34 -0
.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_module.py +101 -0

.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (974 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/__pycache__/columns.cpython-311.pyc ADDED Viewed

Binary file (1.68 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/columns.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from ray.util.annotations import DeveloperAPI
+@DeveloperAPI
+class Columns:
+    """Definitions of common column names for RL data, e.g. 'obs', 'rewards', etc..
+    Note that this replaces the `SampleBatch` and `Postprocessing` columns (of the same
+    name).
+    """
+    # Observation received from an environment after `reset()` or `step()`.
+    OBS = "obs"
+    # Infos received from an environment after `reset()` or `step()`.
+    INFOS = "infos"
+    # Action computed/sampled by an RLModule.
+    ACTIONS = "actions"
+    # Action actually sent to the (gymnasium) `Env.step()` method.
+    ACTIONS_FOR_ENV = "actions_for_env"
+    # Reward returned by `env.step()`.
+    REWARDS = "rewards"
+    # Termination signal received from an environment after `step()`.
+    TERMINATEDS = "terminateds"
+    # Truncation signal received from an environment after `step()` (e.g. because
+    # of a reached time limit).
+    TRUNCATEDS = "truncateds"
+    # Next observation: Only used by algorithms that need to look at TD-data for
+    # training, such as off-policy/DQN algos.
+    NEXT_OBS = "new_obs"
+    # Uniquely identifies an episode
+    EPS_ID = "eps_id"
+    AGENT_ID = "agent_id"
+    MODULE_ID = "module_id"
+    # The size of non-zero-padded data within a (e.g. LSTM) zero-padded
+    # (B, T, ...)-style train batch.
+    SEQ_LENS = "seq_lens"
+    # Episode timestep counter.
+    T = "t"
+    # Common extra RLModule output keys.
+    STATE_IN = "state_in"
+    NEXT_STATE_IN = "next_state_in"
+    STATE_OUT = "state_out"
+    NEXT_STATE_OUT = "next_state_out"
+    EMBEDDINGS = "embeddings"
+    ACTION_DIST_INPUTS = "action_dist_inputs"
+    ACTION_PROB = "action_prob"
+    ACTION_LOGP = "action_logp"
+    # Value function predictions.
+    VF_PREDS = "vf_preds"
+    # Values, predicted at one timestep beyond the last timestep taken.
+    # These are usually calculated via the value function network using the final
+    # observation (and in case of an RNN: the last returned internal state).
+    VALUES_BOOTSTRAPPED = "values_bootstrapped"
+    # Postprocessing columns.
+    ADVANTAGES = "advantages"
+    VALUE_TARGETS = "value_targets"
+    # Intrinsic rewards (learning with curiosity).
+    INTRINSIC_REWARDS = "intrinsic_rewards"
+    # Discounted sum of rewards till the end of the episode (or chunk).
+    RETURNS_TO_GO = "returns_to_go"
+    # Loss mask. If provided in a train batch, a Learner's compute_loss_for_module
+    # method should respect the False-set value in here and mask out the respective
+    # items form the loss.
+    LOSS_MASK = "loss_mask"

.venv/lib/python3.11/site-packages/ray/rllib/core/models/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (194 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (20.2 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/catalog.cpython-311.pyc ADDED Viewed

Binary file (25.3 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/__pycache__/configs.cpython-311.pyc ADDED Viewed

Binary file (52.1 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/base.py ADDED Viewed

	@@ -0,0 +1,444 @@

+import abc
+from typing import List, Optional, Tuple, Union
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.models.configs import ModelConfig
+from ray.rllib.core.models.specs.specs_base import Spec
+from ray.rllib.policy.rnn_sequencing import get_fold_unfold_fns
+from ray.rllib.utils.annotations import ExperimentalAPI, override
+from ray.rllib.utils.typing import TensorType
+from ray.util.annotations import DeveloperAPI
+# Top level keys that unify model i/o.
+ENCODER_OUT: str = "encoder_out"
+# For Actor-Critic algorithms, these signify data related to the actor and critic
+ACTOR: str = "actor"
+CRITIC: str = "critic"
+@ExperimentalAPI
+class Model(abc.ABC):
+    """Framework-agnostic base class for RLlib models.
+    Models are low-level neural network components that offer input- and
+    output-specification, a forward method, and a get_initial_state method. Models
+    are composed in RLModules.
+    Usage Example together with ModelConfig:
+    .. testcode::
+        from ray.rllib.core.models.base import Model
+        from ray.rllib.core.models.configs import ModelConfig
+        from dataclasses import dataclass
+        class MyModel(Model):
+            def __init__(self, config):
+                super().__init__(config)
+                self.my_param = config.my_param * 2
+            def _forward(self, input_dict):
+                return input_dict["obs"] * self.my_param
+        @dataclass
+        class MyModelConfig(ModelConfig):
+            my_param: int = 42
+            def build(self, framework: str):
+                if framework == "bork":
+                    return MyModel(self)
+        config = MyModelConfig(my_param=3)
+        model = config.build(framework="bork")
+        print(model._forward({"obs": 1}))
+    .. testoutput::
+        6
+    """
+    def __init__(self, config: ModelConfig):
+        self.config = config
+    def __init_subclass__(cls, **kwargs):
+        # Automatically add a __post_init__ method to all subclasses of Model.
+        # This method is called after the __init__ method of the subclass.
+        def init_decorator(previous_init):
+            def new_init(self, *args, **kwargs):
+                previous_init(self, *args, **kwargs)
+                if type(self) is cls:
+                    self.__post_init__()
+            return new_init
+        cls.__init__ = init_decorator(cls.__init__)
+    def __post_init__(self):
+        """Called automatically after the __init__ method of the subclasses.
+        The module first calls the __init__ method of the subclass, With in the
+        __init__ you should call the super().__init__ method. Then after the __init__
+        method of the subclass is called, the __post_init__ method is called.
+        This is a good place to do any initialization that requires access to the
+        subclass's attributes.
+        """
+        self._input_specs = self.get_input_specs()
+        self._output_specs = self.get_output_specs()
+    def get_input_specs(self) -> Optional[Spec]:
+        """Returns the input specs of this model.
+        Override `get_input_specs` to define your own input specs.
+        This method should not be called often, e.g. every forward pass.
+        Instead, it should be called once at instantiation to define Model.input_specs.
+        Returns:
+            Spec: The input specs.
+        """
+        return None
+    def get_output_specs(self) -> Optional[Spec]:
+        """Returns the output specs of this model.
+        Override `get_output_specs` to define your own output specs.
+        This method should not be called often, e.g. every forward pass.
+        Instead, it should be called once at instantiation to define Model.output_specs.
+        Returns:
+            Spec: The output specs.
+        """
+        return None
+    @property
+    def input_specs(self) -> Spec:
+        """Returns the input spec of this model."""
+        return self._input_specs
+    @input_specs.setter
+    def input_specs(self, spec: Spec) -> None:
+        raise ValueError(
+            "`input_specs` cannot be set directly. Override "
+            "Model.get_input_specs() instead. Set Model._input_specs if "
+            "you want to override this behavior."
+        )
+    @property
+    def output_specs(self) -> Spec:
+        """Returns the output specs of this model."""
+        return self._output_specs
+    @output_specs.setter
+    def output_specs(self, spec: Spec) -> None:
+        raise ValueError(
+            "`output_specs` cannot be set directly. Override "
+            "Model.get_output_specs() instead. Set Model._output_specs if "
+            "you want to override this behavior."
+        )
+    def get_initial_state(self) -> Union[dict, List[TensorType]]:
+        """Returns the initial state of the Model.
+        It can be left empty if this Model is not stateful.
+        """
+        return dict()
+    @abc.abstractmethod
+    def _forward(self, input_dict: dict, **kwargs) -> dict:
+        """Returns the output of this model for the given input.
+        This method is called by the forwarding method of the respective framework
+        that is itself wrapped by RLlib in order to check model inputs and outputs.
+        Args:
+            input_dict: The input tensors.
+            **kwargs: Forward compatibility kwargs.
+        Returns:
+            dict: The output tensors.
+        """
+    @abc.abstractmethod
+    def get_num_parameters(self) -> Tuple[int, int]:
+        """Returns a tuple of (num trainable params, num non-trainable params)."""
+    @abc.abstractmethod
+    def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)) -> None:
+        """Helper method to set all weights to deterministic dummy values.
+        Calling this method on two `Models` that have the same architecture using
+        the exact same `value_sequence` arg should make both models output the exact
+        same values on arbitrary inputs. This will work, even if the two `Models`
+        are of different DL frameworks.
+        Args:
+            value_sequence: Looping through the list of all parameters (weight matrices,
+                bias tensors, etc..) of this model, in each iteration i, we set all
+                values in this parameter to `value_sequence[i % len(value_sequence)]`
+                (round robin).
+        Example:
+            TODO:
+        """
+@ExperimentalAPI
+class Encoder(Model, abc.ABC):
+    """The framework-agnostic base class for all RLlib encoders.
+    Encoders are used to transform observations to a latent space.
+    Therefore, their `input_specs` contains the observation space dimensions.
+    Similarly, their `output_specs` contains the latent space dimensions.
+    Encoders can be recurrent, in which case the state should be part of input- and
+    output_specs. The latent vectors produced by an encoder are fed into subsequent
+    "heads". Any implementation of Encoder should also be callable. This should be done
+    by also inheriting from a framework-specific model base-class, s.a. TorchModel or
+    TfModel.
+    Abstract illustration of typical flow of tensors:
+    Inputs
+    |
+    Encoder
+    |      \
+    Head1  Head2
+    |      /
+    Outputs
+    Outputs of encoders are generally of shape (B, latent_dim) or (B, T, latent_dim).
+    That is, for time-series data, we encode into the latent space for each time step.
+    This should be reflected in the `output_specs`.
+    Usage example together with a ModelConfig:
+    .. testcode::
+        from dataclasses import dataclass
+        import numpy as np
+        from ray.rllib.core.columns import Columns
+        from ray.rllib.core.models.base import Encoder, ENCODER_OUT
+        from ray.rllib.core.models.configs import ModelConfig
+        from ray.rllib.policy.sample_batch import SampleBatch
+        class NumpyEncoder(Encoder):
+            def __init__(self, config):
+                super().__init__(config)
+                self.factor = config.factor
+            def __call__(self, *args, **kwargs):
+                # This is a dummy method to do checked forward passes.
+                return self._forward(*args, **kwargs)
+            def _forward(self, input_dict, **kwargs):
+                obs = input_dict[Columns.OBS]
+                return {
+                    ENCODER_OUT: np.array(obs) * self.factor,
+                    Columns.STATE_OUT: (
+                        np.array(input_dict[Columns.STATE_IN])
+                        * self.factor
+                    ),
+                }
+        @dataclass
+        class NumpyEncoderConfig(ModelConfig):
+            factor: int = None
+            def build(self, framework: str):
+                return NumpyEncoder(self)
+        config = NumpyEncoderConfig(factor=2)
+        encoder = NumpyEncoder(config)
+        print(encoder({Columns.OBS: 1, Columns.STATE_IN: 2}))
+    .. testoutput::
+        {'encoder_out': 2, 'state_out': 4}
+    """
+    @abc.abstractmethod
+    def _forward(self, input_dict: dict, **kwargs) -> dict:
+        """Returns the latent of the encoder for the given inputs.
+        This method is called by the forwarding method of the respective framework
+        that is itself wrapped by RLlib in order to check model inputs and outputs.
+        The input dict contains at minimum the observation and the state of the encoder
+        (None for stateless encoders).
+        The output dict contains at minimum the latent and the state of the encoder
+        (None for stateless encoders).
+        To establish an agreement between the encoder and RLModules, these values
+        have the fixed keys `Columns.OBS` for the `input_dict`,
+        and `ACTOR` and `CRITIC` for the returned dict.
+        Args:
+            input_dict: The input tensors. Must contain at a minimum the keys
+                Columns.OBS and Columns.STATE_IN (which might be None for stateless
+                encoders).
+            **kwargs: Forward compatibility kwargs.
+        Returns:
+            The output tensors. Must contain at a minimum the key ENCODER_OUT.
+        """
+@ExperimentalAPI
+class ActorCriticEncoder(Encoder):
+    """An encoder that potentially holds two stateless encoders.
+    This is a special case of Encoder that can either enclose a single,
+    shared encoder or two separate encoders: One for the actor and one for the
+    critic. The two encoders are of the same type, and we can therefore make the
+    assumption that they have the same input and output specs.
+    """
+    framework = None
+    def __init__(self, config: ModelConfig) -> None:
+        super().__init__(config)
+        if config.shared:
+            self.encoder = config.base_encoder_config.build(framework=self.framework)
+        else:
+            self.actor_encoder = config.base_encoder_config.build(
+                framework=self.framework
+            )
+            self.critic_encoder = None
+            if not config.inference_only:
+                self.critic_encoder = config.base_encoder_config.build(
+                    framework=self.framework
+                )
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        if self.config.shared:
+            encoder_outs = self.encoder(inputs, **kwargs)
+            return {
+                ENCODER_OUT: {
+                    ACTOR: encoder_outs[ENCODER_OUT],
+                    **(
+                        {}
+                        if self.config.inference_only
+                        else {CRITIC: encoder_outs[ENCODER_OUT]}
+                    ),
+                }
+            }
+        else:
+            # Encoders should not modify inputs, so we can pass the same inputs
+            actor_out = self.actor_encoder(inputs, **kwargs)
+            if self.critic_encoder:
+                critic_out = self.critic_encoder(inputs, **kwargs)
+            return {
+                ENCODER_OUT: {
+                    ACTOR: actor_out[ENCODER_OUT],
+                    **(
+                        {}
+                        if self.config.inference_only
+                        else {CRITIC: critic_out[ENCODER_OUT]}
+                    ),
+                }
+            }
+@ExperimentalAPI
+class StatefulActorCriticEncoder(Encoder):
+    """An encoder that potentially holds two potentially stateful encoders.
+    This is a special case of Encoder that can either enclose a single,
+    shared encoder or two separate encoders: One for the actor and one for the
+    critic. The two encoders are of the same type, and we can therefore make the
+    assumption that they have the same input and output specs.
+    If this encoder wraps a single encoder, state in input- and output dicts
+    is simply stored under the key `STATE_IN` and `STATE_OUT`, respectively.
+    If this encoder wraps two encoders, state in input- and output dicts is
+    stored under the keys `(STATE_IN, ACTOR)` and `(STATE_IN, CRITIC)` and
+    `(STATE_OUT, ACTOR)` and `(STATE_OUT, CRITIC)`, respectively.
+    """
+    framework = None
+    def __init__(self, config: ModelConfig) -> None:
+        super().__init__(config)
+        if config.shared:
+            self.encoder = config.base_encoder_config.build(framework=self.framework)
+        else:
+            self.actor_encoder = config.base_encoder_config.build(
+                framework=self.framework
+            )
+            self.critic_encoder = config.base_encoder_config.build(
+                framework=self.framework
+            )
+    @override(Model)
+    def get_initial_state(self):
+        if self.config.shared:
+            return self.encoder.get_initial_state()
+        else:
+            return {
+                ACTOR: self.actor_encoder.get_initial_state(),
+                CRITIC: self.critic_encoder.get_initial_state(),
+            }
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        outputs = {}
+        if self.config.shared:
+            outs = self.encoder(inputs, **kwargs)
+            encoder_out = outs.pop(ENCODER_OUT)
+            outputs[ENCODER_OUT] = {ACTOR: encoder_out, CRITIC: encoder_out}
+            outputs[Columns.STATE_OUT] = outs[Columns.STATE_OUT]
+        else:
+            # Shallow copy inputs so that we can add states without modifying
+            # original dict.
+            actor_inputs = inputs.copy()
+            critic_inputs = inputs.copy()
+            actor_inputs[Columns.STATE_IN] = inputs[Columns.STATE_IN][ACTOR]
+            critic_inputs[Columns.STATE_IN] = inputs[Columns.STATE_IN][CRITIC]
+            actor_out = self.actor_encoder(actor_inputs, **kwargs)
+            critic_out = self.critic_encoder(critic_inputs, **kwargs)
+            outputs[ENCODER_OUT] = {
+                ACTOR: actor_out[ENCODER_OUT],
+                CRITIC: critic_out[ENCODER_OUT],
+            }
+            outputs[Columns.STATE_OUT] = {
+                ACTOR: actor_out[Columns.STATE_OUT],
+                CRITIC: critic_out[Columns.STATE_OUT],
+            }
+        return outputs
+@DeveloperAPI
+def tokenize(tokenizer: Encoder, inputs: dict, framework: str) -> dict:
+    """Tokenizes the observations from the input dict.
+    Args:
+        tokenizer: The tokenizer to use.
+        inputs: The input dict.
+    Returns:
+        The output dict.
+    """
+    # Tokenizer may depend solely on observations.
+    obs = inputs[Columns.OBS]
+    tokenizer_inputs = {Columns.OBS: obs}
+    size = list(obs.size() if framework == "torch" else obs.shape)
+    b_dim, t_dim = size[:2]
+    fold, unfold = get_fold_unfold_fns(b_dim, t_dim, framework=framework)
+    # Push through the tokenizer encoder.
+    out = tokenizer(fold(tokenizer_inputs))
+    out = out[ENCODER_OUT]
+    # Then unfold batch- and time-dimensions again.
+    return unfold(out)

.venv/lib/python3.11/site-packages/ray/rllib/core/models/catalog.py ADDED Viewed

	@@ -0,0 +1,667 @@

+import dataclasses
+import enum
+import functools
+from typing import Optional
+import gymnasium as gym
+import numpy as np
+import tree
+from gymnasium.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
+from ray.rllib.core.models.base import Encoder
+from ray.rllib.core.models.configs import (
+    CNNEncoderConfig,
+    MLPEncoderConfig,
+    RecurrentEncoderConfig,
+)
+from ray.rllib.core.models.configs import ModelConfig
+from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
+from ray.rllib.models.distributions import Distribution
+from ray.rllib.models.preprocessors import get_preprocessor, Preprocessor
+from ray.rllib.models.utils import get_filter_config
+from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE
+from ray.rllib.utils.error import UnsupportedSpaceException
+from ray.rllib.utils.spaces.simplex import Simplex
+from ray.rllib.utils.spaces.space_utils import flatten_space
+from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
+from ray.rllib.utils.annotations import (
+    OverrideToImplementCustomLogic,
+    OverrideToImplementCustomLogic_CallToSuperRecommended,
+)
+class Catalog:
+    """Describes the sub-module-architectures to be used in RLModules.
+    RLlib's native RLModules get their Models from a Catalog object.
+    By default, that Catalog builds the configs it has as attributes.
+    This component was build to be hackable and extensible. You can inject custom
+    components into RL Modules by overriding the `build_xxx` methods of this class.
+    Note that it is recommended to write a custom RL Module for a single use-case.
+    Modifications to Catalogs mostly make sense if you want to reuse the same
+    Catalog for different RL Modules. For example if you have written a custom
+    encoder and want to inject it into different RL Modules (e.g. for PPO, DQN, etc.).
+    You can influence the decision tree that determines the sub-components by modifying
+    `Catalog._determine_components_hook`.
+    Usage example:
+    # Define a custom catalog
+    .. testcode::
+        import torch
+        import gymnasium as gym
+        from ray.rllib.core.models.configs import MLPHeadConfig
+        from ray.rllib.core.models.catalog import Catalog
+        class MyCatalog(Catalog):
+            def __init__(
+                self,
+                observation_space: gym.Space,
+                action_space: gym.Space,
+                model_config_dict: dict,
+            ):
+                super().__init__(observation_space, action_space, model_config_dict)
+                self.my_model_config = MLPHeadConfig(
+                    hidden_layer_dims=[64, 32],
+                    input_dims=[self.observation_space.shape[0]],
+                )
+            def build_my_head(self, framework: str):
+                return self.my_model_config.build(framework=framework)
+        # With that, RLlib can build and use models from this catalog like this:
+        catalog = MyCatalog(gym.spaces.Box(0, 1), gym.spaces.Box(0, 1), {})
+        my_head = catalog.build_my_head(framework="torch")
+        # Make a call to the built model.
+        out = my_head(torch.Tensor([[1]]))
+    """
+    # TODO (Sven): Add `framework` arg to c'tor and remove this arg from `build`
+    #  methods. This way, we can already know in the c'tor of Catalog, what the exact
+    #  action distibution objects are and thus what the output dims for e.g. a pi-head
+    #  will be.
+    def __init__(
+        self,
+        observation_space: gym.Space,
+        action_space: gym.Space,
+        model_config_dict: dict,
+        # deprecated args.
+        view_requirements=DEPRECATED_VALUE,
+    ):
+        """Initializes a Catalog with a default encoder config.
+        Args:
+            observation_space: The observation space of the environment.
+            action_space: The action space of the environment.
+            model_config_dict: The model config that specifies things like hidden
+                dimensions and activations functions to use in this Catalog.
+        """
+        if view_requirements != DEPRECATED_VALUE:
+            deprecation_warning(old="Catalog(view_requirements=..)", error=True)
+        # TODO (sven): The following logic won't be needed anymore, once we get rid of
+        #  Catalogs entirely. We will assert directly inside the algo's DefaultRLModule
+        #  class that the `model_config` is a DefaultModelConfig. Thus users won't be
+        #  able to pass in partial config dicts into a default model (alternatively, we
+        #  could automatically augment the user provided dict by the default config
+        #  dataclass object only(!) for default modules).
+        if dataclasses.is_dataclass(model_config_dict):
+            model_config_dict = dataclasses.asdict(model_config_dict)
+        default_config = dataclasses.asdict(DefaultModelConfig())
+        # end: TODO
+        self.observation_space = observation_space
+        self.action_space = action_space
+        self._model_config_dict = default_config | model_config_dict
+        self._latent_dims = None
+        self._determine_components_hook()
+    @OverrideToImplementCustomLogic_CallToSuperRecommended
+    def _determine_components_hook(self):
+        """Decision tree hook for subclasses to override.
+        By default, this method executes the decision tree that determines the
+        components that a Catalog builds. You can extend the components by overriding
+        this or by adding to the constructor of your subclass.
+        Override this method if you don't want to use the default components
+        determined here. If you want to use them but add additional components, you
+        should call `super()._determine_components()` at the beginning of your
+        implementation.
+        This makes it so that subclasses are not forced to create an encoder config
+        if the rest of their catalog is not dependent on it or if it breaks.
+        At the end of this method, an attribute `Catalog.latent_dims`
+        should be set so that heads can be built using that information.
+        """
+        self._encoder_config = self._get_encoder_config(
+            observation_space=self.observation_space,
+            action_space=self.action_space,
+            model_config_dict=self._model_config_dict,
+        )
+        # Create a function that can be called when framework is known to retrieve the
+        # class type for action distributions
+        self._action_dist_class_fn = functools.partial(
+            self._get_dist_cls_from_action_space, action_space=self.action_space
+        )
+        # The dimensions of the latent vector that is output by the encoder and fed
+        # to the heads.
+        self.latent_dims = self._encoder_config.output_dims
+    @property
+    def latent_dims(self):
+        """Returns the latent dimensions of the encoder.
+        This establishes an agreement between encoder and heads about the latent
+        dimensions. Encoders can be built to output a latent tensor with
+        `latent_dims` dimensions, and heads can be built with tensors of
+        `latent_dims` dimensions as inputs. This can be safely ignored if this
+        agreement is not needed in case of modifications to the Catalog.
+        Returns:
+            The latent dimensions of the encoder.
+        """
+        return self._latent_dims
+    @latent_dims.setter
+    def latent_dims(self, value):
+        self._latent_dims = value
+    @OverrideToImplementCustomLogic
+    def build_encoder(self, framework: str) -> Encoder:
+        """Builds the encoder.
+        By default, this method builds an encoder instance from Catalog._encoder_config.
+        You should override this if you want to use RLlib's default RL Modules but
+        only want to change the encoder. For example, if you want to use a custom
+        encoder, but want to use RLlib's default heads, action distribution and how
+        tensors are routed between them. If you want to have full control over the
+        RL Module, we recommend writing your own RL Module by inheriting from one of
+        RLlib's RL Modules instead.
+        Args:
+            framework: The framework to use. Either "torch" or "tf2".
+        Returns:
+            The encoder.
+        """
+        assert hasattr(self, "_encoder_config"), (
+            "You must define a `Catalog._encoder_config` attribute in your Catalog "
+            "subclass or override the `Catalog.build_encoder` method. By default, "
+            "an encoder_config is created in the __post_init__ method."
+        )
+        return self._encoder_config.build(framework=framework)
+    @OverrideToImplementCustomLogic
+    def get_action_dist_cls(self, framework: str):
+        """Get the action distribution class.
+        The default behavior is to get the action distribution from the
+        `Catalog._action_dist_class_fn`.
+        You should override this to have RLlib build your custom action
+        distribution instead of the default one. For example, if you don't want to
+        use RLlib's default RLModules with their default models, but only want to
+        change the distribution that Catalog returns.
+        Args:
+            framework: The framework to use. Either "torch" or "tf2".
+        Returns:
+            The action distribution.
+        """
+        assert hasattr(self, "_action_dist_class_fn"), (
+            "You must define a `Catalog._action_dist_class_fn` attribute in your "
+            "Catalog subclass or override the `Catalog.action_dist_class_fn` method. "
+            "By default, an action_dist_class_fn is created in the __post_init__ "
+            "method."
+        )
+        return self._action_dist_class_fn(framework=framework)
+    @classmethod
+    def _get_encoder_config(
+        cls,
+        observation_space: gym.Space,
+        model_config_dict: dict,
+        action_space: gym.Space = None,
+    ) -> ModelConfig:
+        """Returns an EncoderConfig for the given input_space and model_config_dict.
+        Encoders are usually used in RLModules to transform the input space into a
+        latent space that is then fed to the heads. The returned EncoderConfig
+        objects correspond to the built-in Encoder classes in RLlib.
+        For example, for a simple 1D-Box input_space, RLlib offers an
+        MLPEncoder, hence this method returns the MLPEncoderConfig. You can overwrite
+        this method to produce specific EncoderConfigs for your custom Models.
+        The following input spaces lead to the following configs:
+        - 1D-Box: MLPEncoderConfig
+        - 3D-Box: CNNEncoderConfig
+        # TODO (Artur): Support more spaces here
+        # ...
+        Args:
+            observation_space: The observation space to use.
+            model_config_dict: The model config to use.
+            action_space: The action space to use if actions are to be encoded. This
+                is commonly the case for LSTM models.
+        Returns:
+            The encoder config.
+        """
+        activation = model_config_dict["fcnet_activation"]
+        output_activation = model_config_dict["fcnet_activation"]
+        use_lstm = model_config_dict["use_lstm"]
+        if use_lstm:
+            encoder_config = RecurrentEncoderConfig(
+                input_dims=observation_space.shape,
+                recurrent_layer_type="lstm",
+                hidden_dim=model_config_dict["lstm_cell_size"],
+                hidden_weights_initializer=model_config_dict["lstm_kernel_initializer"],
+                hidden_weights_initializer_config=model_config_dict[
+                    "lstm_kernel_initializer_kwargs"
+                ],
+                hidden_bias_initializer=model_config_dict["lstm_bias_initializer"],
+                hidden_bias_initializer_config=model_config_dict[
+                    "lstm_bias_initializer_kwargs"
+                ],
+                batch_major=True,
+                num_layers=1,
+                tokenizer_config=cls.get_tokenizer_config(
+                    observation_space,
+                    model_config_dict,
+                ),
+            )
+        else:
+            # TODO (Artur): Maybe check for original spaces here
+            # input_space is a 1D Box
+            if isinstance(observation_space, Box) and len(observation_space.shape) == 1:
+                # In order to guarantee backward compatability with old configs,
+                # we need to check if no latent dim was set and simply reuse the last
+                # fcnet hidden dim for that purpose.
+                hidden_layer_dims = model_config_dict["fcnet_hiddens"][:-1]
+                encoder_latent_dim = model_config_dict["fcnet_hiddens"][-1]
+                encoder_config = MLPEncoderConfig(
+                    input_dims=observation_space.shape,
+                    hidden_layer_dims=hidden_layer_dims,
+                    hidden_layer_activation=activation,
+                    hidden_layer_weights_initializer=model_config_dict[
+                        "fcnet_kernel_initializer"
+                    ],
+                    hidden_layer_weights_initializer_config=model_config_dict[
+                        "fcnet_kernel_initializer_kwargs"
+                    ],
+                    hidden_layer_bias_initializer=model_config_dict[
+                        "fcnet_bias_initializer"
+                    ],
+                    hidden_layer_bias_initializer_config=model_config_dict[
+                        "fcnet_bias_initializer_kwargs"
+                    ],
+                    output_layer_dim=encoder_latent_dim,
+                    output_layer_activation=output_activation,
+                    output_layer_weights_initializer=model_config_dict[
+                        "fcnet_kernel_initializer"
+                    ],
+                    output_layer_weights_initializer_config=model_config_dict[
+                        "fcnet_kernel_initializer_kwargs"
+                    ],
+                    output_layer_bias_initializer=model_config_dict[
+                        "fcnet_bias_initializer"
+                    ],
+                    output_layer_bias_initializer_config=model_config_dict[
+                        "fcnet_bias_initializer_kwargs"
+                    ],
+                )
+            # input_space is a 3D Box
+            elif (
+                isinstance(observation_space, Box) and len(observation_space.shape) == 3
+            ):
+                if not model_config_dict.get("conv_filters"):
+                    model_config_dict["conv_filters"] = get_filter_config(
+                        observation_space.shape
+                    )
+                encoder_config = CNNEncoderConfig(
+                    input_dims=observation_space.shape,
+                    cnn_filter_specifiers=model_config_dict["conv_filters"],
+                    cnn_activation=model_config_dict["conv_activation"],
+                    cnn_kernel_initializer=model_config_dict["conv_kernel_initializer"],
+                    cnn_kernel_initializer_config=model_config_dict[
+                        "conv_kernel_initializer_kwargs"
+                    ],
+                    cnn_bias_initializer=model_config_dict["conv_bias_initializer"],
+                    cnn_bias_initializer_config=model_config_dict[
+                        "conv_bias_initializer_kwargs"
+                    ],
+                )
+            # input_space is a 2D Box
+            elif (
+                isinstance(observation_space, Box) and len(observation_space.shape) == 2
+            ):
+                # RLlib used to support 2D Box spaces by silently flattening them
+                raise ValueError(
+                    f"No default encoder config for obs space={observation_space},"
+                    f" lstm={use_lstm} found. 2D Box "
+                    f"spaces are not supported. They should be either flattened to a "
+                    f"1D Box space or enhanced to be a 3D box space."
+                )
+            # input_space is a possibly nested structure of spaces.
+            else:
+                # NestedModelConfig
+                raise ValueError(
+                    f"No default encoder config for obs space={observation_space},"
+                    f" lstm={use_lstm} found."
+                )
+        return encoder_config
+    @classmethod
+    @OverrideToImplementCustomLogic
+    def get_tokenizer_config(
+        cls,
+        observation_space: gym.Space,
+        model_config_dict: dict,
+        # deprecated args.
+        view_requirements=DEPRECATED_VALUE,
+    ) -> ModelConfig:
+        """Returns a tokenizer config for the given space.
+        This is useful for recurrent / transformer models that need to tokenize their
+        inputs. By default, RLlib uses the models supported by Catalog out of the box to
+        tokenize.
+        You should override this method if you want to change the custom tokenizer
+        inside current encoders that Catalog returns without providing the recurrent
+        network as a whole. For example, if you want to define some custom CNN layers
+        as a tokenizer for a recurrent encoder that already includes the recurrent
+        layers and handles the state.
+        Args:
+            observation_space: The observation space to use.
+            model_config_dict: The model config to use.
+        """
+        if view_requirements != DEPRECATED_VALUE:
+            deprecation_warning(old="Catalog(view_requirements=..)", error=True)
+        return cls._get_encoder_config(
+            observation_space=observation_space,
+            # Use model_config_dict without flags that would end up in complex models
+            model_config_dict={
+                **model_config_dict,
+                **{"use_lstm": False, "use_attention": False},
+            },
+        )
+    @classmethod
+    def _get_dist_cls_from_action_space(
+        cls,
+        action_space: gym.Space,
+        *,
+        framework: Optional[str] = None,
+    ) -> Distribution:
+        """Returns a distribution class for the given action space.
+        You can get the required input dimension for the distribution by calling
+        `action_dict_cls.required_input_dim(action_space)`
+        on the retrieved class. This is useful, because the Catalog needs to find out
+        about the required input dimension for the distribution before the model that
+        outputs these inputs is configured.
+        Args:
+            action_space: Action space of the target gym env.
+            framework: The framework to use.
+        Returns:
+            The distribution class for the given action space.
+        """
+        # If no framework provided, return no action distribution class (None).
+        if framework is None:
+            return None
+        # This method is structured in two steps:
+        # Firstly, construct a dictionary containing the available distribution classes.
+        # Secondly, return the correct distribution class for the given action space.
+        # Step 1: Construct the dictionary.
+        class DistEnum(enum.Enum):
+            Categorical = "Categorical"
+            DiagGaussian = "Gaussian"
+            Deterministic = "Deterministic"
+            MultiDistribution = "MultiDistribution"
+            MultiCategorical = "MultiCategorical"
+        if framework == "torch":
+            from ray.rllib.models.torch.torch_distributions import (
+                TorchCategorical,
+                TorchDeterministic,
+                TorchDiagGaussian,
+            )
+            distribution_dicts = {
+                DistEnum.Deterministic: TorchDeterministic,
+                DistEnum.DiagGaussian: TorchDiagGaussian,
+                DistEnum.Categorical: TorchCategorical,
+            }
+        elif framework == "tf2":
+            from ray.rllib.models.tf.tf_distributions import (
+                TfCategorical,
+                TfDeterministic,
+                TfDiagGaussian,
+            )
+            distribution_dicts = {
+                DistEnum.Deterministic: TfDeterministic,
+                DistEnum.DiagGaussian: TfDiagGaussian,
+                DistEnum.Categorical: TfCategorical,
+            }
+        else:
+            raise ValueError(
+                f"Unknown framework: {framework}. Only 'torch' and 'tf2' are "
+                "supported for RLModule Catalogs."
+            )
+        # Only add a MultiAction distribution class to the dict if we can compute its
+        # components (we need a Tuple/Dict space for this).
+        if isinstance(action_space, (Tuple, Dict)):
+            partial_multi_action_distribution_cls = _multi_action_dist_partial_helper(
+                catalog_cls=cls,
+                action_space=action_space,
+                framework=framework,
+            )
+            distribution_dicts[
+                DistEnum.MultiDistribution
+            ] = partial_multi_action_distribution_cls
+        # Only add a MultiCategorical distribution class to the dict if we can compute
+        # its components (we need a MultiDiscrete space for this).
+        if isinstance(action_space, MultiDiscrete):
+            partial_multi_categorical_distribution_cls = (
+                _multi_categorical_dist_partial_helper(
+                    action_space=action_space,
+                    framework=framework,
+                )
+            )
+            distribution_dicts[
+                DistEnum.MultiCategorical
+            ] = partial_multi_categorical_distribution_cls
+        # Step 2: Return the correct distribution class for the given action space.
+        # Box space -> DiagGaussian OR Deterministic.
+        if isinstance(action_space, Box):
+            if action_space.dtype.char in np.typecodes["AllInteger"]:
+                raise ValueError(
+                    "Box(..., `int`) action spaces are not supported. "
+                    "Use MultiDiscrete  or Box(..., `float`)."
+                )
+            else:
+                if len(action_space.shape) > 1:
+                    raise UnsupportedSpaceException(
+                        f"Action space has multiple dimensions {action_space.shape}. "
+                        f"Consider reshaping this into a single dimension, using a "
+                        f"custom action distribution, using a Tuple action space, "
+                        f"or the multi-agent API."
+                    )
+                return distribution_dicts[DistEnum.DiagGaussian]
+        # Discrete Space -> Categorical.
+        elif isinstance(action_space, Discrete):
+            return distribution_dicts[DistEnum.Categorical]
+        # Tuple/Dict Spaces -> MultiAction.
+        elif isinstance(action_space, (Tuple, Dict)):
+            return distribution_dicts[DistEnum.MultiDistribution]
+        # Simplex -> Dirichlet.
+        elif isinstance(action_space, Simplex):
+            # TODO(Artur): Supported Simplex (in torch).
+            raise NotImplementedError("Simplex action space not yet supported.")
+        # MultiDiscrete -> MultiCategorical.
+        elif isinstance(action_space, MultiDiscrete):
+            return distribution_dicts[DistEnum.MultiCategorical]
+        # Unknown type -> Error.
+        else:
+            raise NotImplementedError(f"Unsupported action space: `{action_space}`")
+    @staticmethod
+    def get_preprocessor(observation_space: gym.Space, **kwargs) -> Preprocessor:
+        """Returns a suitable preprocessor for the given observation space.
+        Args:
+            observation_space: The input observation space.
+            **kwargs: Forward-compatible kwargs.
+        Returns:
+            preprocessor: Preprocessor for the observations.
+        """
+        # TODO(Artur): Since preprocessors have long been @PublicAPI with the options
+        #  kwarg as part of their constructor, we fade out support for this,
+        #  beginning with this entrypoint.
+        # Next, we should deprecate the `options` kwarg from the Preprocessor itself,
+        # after deprecating the old catalog and other components that still pass this.
+        options = kwargs.get("options", {})
+        if options:
+            deprecation_warning(
+                old="get_preprocessor_for_space(..., options={...})",
+                help="Override `Catalog.get_preprocessor()` "
+                "in order to implement custom behaviour.",
+                error=False,
+            )
+        if options.get("custom_preprocessor"):
+            deprecation_warning(
+                old="model_config['custom_preprocessor']",
+                help="Custom preprocessors are deprecated, "
+                "since they sometimes conflict with the built-in "
+                "preprocessors for handling complex observation spaces. "
+                "Please use wrapper classes around your environment "
+                "instead.",
+                error=True,
+            )
+        else:
+            # TODO(Artur): Inline the get_preprocessor() call here once we have
+            #  deprecated the old model catalog.
+            cls = get_preprocessor(observation_space)
+            prep = cls(observation_space, options)
+            return prep
+def _multi_action_dist_partial_helper(
+    catalog_cls: "Catalog", action_space: gym.Space, framework: str
+) -> Distribution:
+    """Helper method to get a partial of a MultiActionDistribution.
+    This is useful for when we want to create MultiActionDistributions from
+    logits only (!) later, but know the action space now already.
+    Args:
+        catalog_cls: The ModelCatalog class to use.
+        action_space: The action space to get the child distribution classes for.
+        framework: The framework to use.
+    Returns:
+        A partial of the TorchMultiActionDistribution class.
+    """
+    action_space_struct = get_base_struct_from_space(action_space)
+    flat_action_space = flatten_space(action_space)
+    child_distribution_cls_struct = tree.map_structure(
+        lambda s: catalog_cls._get_dist_cls_from_action_space(
+            action_space=s,
+            framework=framework,
+        ),
+        action_space_struct,
+    )
+    flat_distribution_clses = tree.flatten(child_distribution_cls_struct)
+    logit_lens = [
+        int(dist_cls.required_input_dim(space))
+        for dist_cls, space in zip(flat_distribution_clses, flat_action_space)
+    ]
+    if framework == "torch":
+        from ray.rllib.models.torch.torch_distributions import (
+            TorchMultiDistribution,
+        )
+        multi_action_dist_cls = TorchMultiDistribution
+    elif framework == "tf2":
+        from ray.rllib.models.tf.tf_distributions import TfMultiDistribution
+        multi_action_dist_cls = TfMultiDistribution
+    else:
+        raise ValueError(f"Unsupported framework: {framework}")
+    partial_dist_cls = multi_action_dist_cls.get_partial_dist_cls(
+        space=action_space,
+        child_distribution_cls_struct=child_distribution_cls_struct,
+        input_lens=logit_lens,
+    )
+    return partial_dist_cls
+def _multi_categorical_dist_partial_helper(
+    action_space: gym.Space, framework: str
+) -> Distribution:
+    """Helper method to get a partial of a MultiCategorical Distribution.
+    This is useful for when we want to create MultiCategorical Distribution from
+    logits only (!) later, but know the action space now already.
+    Args:
+        action_space: The action space to get the child distribution classes for.
+        framework: The framework to use.
+    Returns:
+        A partial of the MultiCategorical class.
+    """
+    if framework == "torch":
+        from ray.rllib.models.torch.torch_distributions import TorchMultiCategorical
+        multi_categorical_dist_cls = TorchMultiCategorical
+    elif framework == "tf2":
+        from ray.rllib.models.tf.tf_distributions import TfMultiCategorical
+        multi_categorical_dist_cls = TfMultiCategorical
+    else:
+        raise ValueError(f"Unsupported framework: {framework}")
+    partial_dist_cls = multi_categorical_dist_cls.get_partial_dist_cls(
+        space=action_space, input_lens=list(action_space.nvec)
+    )
+    return partial_dist_cls

.venv/lib/python3.11/site-packages/ray/rllib/core/models/configs.py ADDED Viewed

	@@ -0,0 +1,1095 @@

+import abc
+from dataclasses import dataclass, field
+import functools
+from typing import Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
+import numpy as np
+from ray.rllib.models.torch.misc import (
+    same_padding,
+    same_padding_transpose_after_stride,
+    valid_padding,
+)
+from ray.rllib.models.utils import get_activation_fn, get_initializer_fn
+from ray.rllib.utils.annotations import ExperimentalAPI
+if TYPE_CHECKING:
+    from ray.rllib.core.models.base import Model, Encoder
+@ExperimentalAPI
+def _framework_implemented(torch: bool = True, tf2: bool = True):
+    """Decorator to check if a model was implemented in a framework.
+    Args:
+        torch: Whether we can build this model with torch.
+        tf2: Whether we can build this model with tf2.
+    Returns:
+        The decorated function.
+    Raises:
+        ValueError: If the framework is not available to build.
+    """
+    accepted = []
+    if torch:
+        accepted.append("torch")
+    if tf2:
+        accepted.append("tf2")
+    def decorator(fn: Callable) -> Callable:
+        @functools.wraps(fn)
+        def checked_build(self, framework, **kwargs):
+            if framework not in accepted:
+                raise ValueError(
+                    f"This config does not support framework "
+                    f"{framework}. Only frameworks in {accepted} are "
+                    f"supported."
+                )
+            return fn(self, framework, **kwargs)
+        return checked_build
+    return decorator
+@ExperimentalAPI
+@dataclass
+class ModelConfig(abc.ABC):
+    """Base class for configuring a `Model` instance.
+    ModelConfigs are DL framework-agnostic.
+    A `Model` (as a sub-component of an `RLModule`) is built via calling the
+    respective ModelConfig's `build()` method.
+    RLModules build their sub-components this way after receiving one or more
+    `ModelConfig` instances from a Catalog object.
+    However, `ModelConfig` is not restricted to be used only with Catalog or RLModules.
+    Usage examples can be found in the individual Model classes', e.g.
+    see `ray.rllib.core.models.configs::MLPHeadConfig`.
+    Attributes:
+        input_dims: The input dimensions of the network
+        always_check_shapes: Whether to always check the inputs and outputs of the
+            model for the specifications. Input specifications are checked on failed
+            forward passes of the model regardless of this flag. If this flag is set
+            to `True`, inputs and outputs are checked on every call. This leads to
+            a slow-down and should only be used for debugging.
+    """
+    input_dims: Union[List[int], Tuple[int]] = None
+    always_check_shapes: bool = False
+    @abc.abstractmethod
+    def build(self, framework: str):
+        """Builds the model.
+        Args:
+            framework: The framework to use for building the model.
+        """
+        raise NotImplementedError
+    @property
+    def output_dims(self) -> Optional[Tuple[int]]:
+        """Read-only `output_dims` are inferred automatically from other settings."""
+        return None
+@ExperimentalAPI
+@dataclass
+class _MLPConfig(ModelConfig):
+    """Generic configuration class for multi-layer-perceptron based Model classes.
+    `output_dims` is reached by either the provided `output_layer_dim` setting (int) OR
+    by the last entry of `hidden_layer_dims`. In the latter case, no special output
+    layer is added and all layers in the stack behave exactly the same. If
+    `output_layer_dim` is provided, users might also change this last layer's
+    activation (`output_layer_activation`) and its bias setting
+    (`output_layer_use_bias`).
+    This is a private class as users should not configure their models directly
+    through this class, but use one of the sub-classes, e.g. `MLPHeadConfig` or
+    `MLPEncoderConfig`.
+    Attributes:
+        input_dims: A 1D tensor indicating the input dimension, e.g. `[32]`.
+        hidden_layer_dims: The sizes of the hidden layers. If an empty list,
+            `output_layer_dim` must be provided (int) and only a single layer will be
+            built.
+        hidden_layer_use_bias: Whether to use bias on all dense layers in the network
+            (excluding a possible separate output layer defined by `output_layer_dim`).
+        hidden_layer_activation: The activation function to use after each layer (
+            except for the output). The default activation for hidden layers is "relu".
+        hidden_layer_use_layernorm: Whether to insert a LayerNorm functionality
+            in between each hidden layer's output and its activation.
+        hidden_layer_weights_initializer: The initializer function or class to use for
+            weight initialization in the hidden layers. If `None` the default
+            initializer of the respective dense layer of a framework (`"torch"` or
+            `"tf2"`) is used. Note, all initializers defined in the framework `"tf2`)
+            are allowed. For `"torch"` only the in-place initializers, i.e. ending with
+            an underscore "_" are allowed.
+        hidden_layer_weights_initializer_config: Configuration to pass into the
+            initializer defined in `hidden_layer_weights_initializer`.
+        hidden_layer_bias_initializer: The initializer function or class to use for
+            bias initialization in the hidden layers. If `None` the default initializer
+            of the respective dense layer of a framework (`"torch"` or `"tf2"`) is used.
+            Note, all initializers defined in the framework `"tf2`) are allowed. For
+            `"torch"` only the in-place initializers, i.e. ending with an underscore "_"
+            are allowed.
+        hidden_layer_bias_initializer_config: Configuration to pass into the
+            initializer defined in `hidden_layer_bias_initializer`.
+        output_layer_dim: An int indicating the size of the output layer. This may be
+            set to `None` in case no extra output layer should be built and only the
+            layers specified by `hidden_layer_dims` will be part of the network.
+        output_layer_use_bias: Whether to use bias on the separate output layer, if any.
+        output_layer_activation: The activation function to use for the output layer,
+            if any. The default activation for the output layer, if any, is "linear",
+            meaning no activation.
+        output_layer_weights_initializer: The initializer function or class to use for
+            weight initialization in the output layers. If `None` the default
+            initializer of the respective dense layer of a framework (`"torch"` or `
+            "tf2"`) is used. Note, all initializers defined in the framework `"tf2`) are
+            allowed. For `"torch"` only the in-place initializers, i.e. ending with an
+            underscore "_" are allowed.
+        output_layer_weights_initializer_config: Configuration to pass into the
+            initializer defined in `output_layer_weights_initializer`.
+        output_layer_bias_initializer: The initializer function or class to use for
+            bias initialization in the output layers. If `None` the default initializer
+            of the respective dense layer of a framework (`"torch"` or `"tf2"`) is used.
+            For `"torch"` only the in-place initializers, i.e. ending with an underscore
+            "_" are allowed.
+        output_layer_bias_initializer_config: Configuration to pass into the
+            initializer defined in `output_layer_bias_initializer`.
+        clip_log_std: If log std should be clipped by `log_std_clip_param`. This applies
+            only to the action distribution parameters that encode the log standard
+            deviation of a `DiagGaussian` distribution.
+        log_std_clip_param: The clipping parameter for the log std, if clipping should
+            be applied - i.e. `clip_log_std=True`. The default value is 20, i.e. log
+            stds are clipped in between -20 and 20.
+    """
+    hidden_layer_dims: Union[List[int], Tuple[int]] = (256, 256)
+    hidden_layer_use_bias: bool = True
+    hidden_layer_activation: str = "relu"
+    hidden_layer_use_layernorm: bool = False
+    hidden_layer_weights_initializer: Optional[Union[str, Callable]] = None
+    hidden_layer_weights_initializer_config: Optional[Dict] = None
+    hidden_layer_bias_initializer: Optional[Union[str, Callable]] = None
+    hidden_layer_bias_initializer_config: Optional[Dict] = None
+    # Optional last output layer with - possibly - different activation and use_bias
+    # settings.
+    output_layer_dim: Optional[int] = None
+    output_layer_use_bias: bool = True
+    output_layer_activation: str = "linear"
+    output_layer_weights_initializer: Optional[Union[str, Callable]] = None
+    output_layer_weights_initializer_config: Optional[Dict] = None
+    output_layer_bias_initializer: Optional[Union[str, Callable]] = None
+    output_layer_bias_initializer_config: Optional[Dict] = None
+    # Optional clipping of log standard deviation.
+    clip_log_std: bool = False
+    # Optional clip parameter for the log standard deviation.
+    log_std_clip_param: float = 20.0
+    @property
+    def output_dims(self):
+        if self.output_layer_dim is None and not self.hidden_layer_dims:
+            raise ValueError(
+                "If `output_layer_dim` is None, you must specify at least one hidden "
+                "layer dim, e.g. `hidden_layer_dims=[32]`!"
+            )
+        # Infer `output_dims` automatically.
+        return (int(self.output_layer_dim or self.hidden_layer_dims[-1]),)
+    def _validate(self, framework: str = "torch"):
+        """Makes sure that settings are valid."""
+        if self.input_dims is not None and len(self.input_dims) != 1:
+            raise ValueError(
+                f"`input_dims` ({self.input_dims}) of MLPConfig must be 1D, "
+                "e.g. `[32]`!"
+            )
+        if len(self.output_dims) != 1:
+            raise ValueError(
+                f"`output_dims` ({self.output_dims}) of _MLPConfig must be "
+                "1D, e.g. `[32]`! This is an inferred value, hence other settings might"
+                " be wrong."
+            )
+        if self.log_std_clip_param is None:
+            raise ValueError(
+                "`log_std_clip_param` of _MLPConfig must be a float value, but is "
+                "`None`."
+            )
+        # Call these already here to catch errors early on.
+        get_activation_fn(self.hidden_layer_activation, framework=framework)
+        get_activation_fn(self.output_layer_activation, framework=framework)
+        get_initializer_fn(self.hidden_layer_weights_initializer, framework=framework)
+        get_initializer_fn(self.hidden_layer_bias_initializer, framework=framework)
+        get_initializer_fn(self.output_layer_weights_initializer, framework=framework)
+        get_initializer_fn(self.output_layer_bias_initializer, framework=framework)
+@ExperimentalAPI
+@dataclass
+class MLPHeadConfig(_MLPConfig):
+    """Configuration for an MLP head.
+    See _MLPConfig for usage details.
+    Example:
+    .. testcode::
+        # Configuration:
+        config = MLPHeadConfig(
+            input_dims=[4],  # must be 1D tensor
+            hidden_layer_dims=[8, 8],
+            hidden_layer_activation="relu",
+            hidden_layer_use_layernorm=False,
+            # final output layer with no activation (linear)
+            output_layer_dim=2,
+            output_layer_activation="linear",
+        )
+        model = config.build(framework="tf2")
+        # Resulting stack in pseudocode:
+        # Linear(4, 8, bias=True)
+        # ReLU()
+        # Linear(8, 8, bias=True)
+        # ReLU()
+        # Linear(8, 2, bias=True)
+    Example:
+    .. testcode::
+        # Configuration:
+        config = MLPHeadConfig(
+            input_dims=[2],
+            hidden_layer_dims=[10, 4],
+            hidden_layer_activation="silu",
+            hidden_layer_use_layernorm=True,
+            hidden_layer_use_bias=False,
+            # Initializer for `framework="torch"`.
+            hidden_layer_weights_initializer="xavier_normal_",
+            hidden_layer_weights_initializer_config={"gain": 0.8},
+            # No final output layer (use last dim in `hidden_layer_dims`
+            # as the size of the last layer in the stack).
+            output_layer_dim=None,
+        )
+        model = config.build(framework="torch")
+        # Resulting stack in pseudocode:
+        # Linear(2, 10, bias=False)
+        # LayerNorm((10,))  # layer norm always before activation
+        # SiLU()
+        # Linear(10, 4, bias=False)
+        # LayerNorm((4,))  # layer norm always before activation
+        # SiLU()
+    """
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Model":
+        self._validate(framework=framework)
+        if framework == "torch":
+            from ray.rllib.core.models.torch.heads import TorchMLPHead
+            return TorchMLPHead(self)
+        else:
+            from ray.rllib.core.models.tf.heads import TfMLPHead
+            return TfMLPHead(self)
+@ExperimentalAPI
+@dataclass
+class FreeLogStdMLPHeadConfig(_MLPConfig):
+    """Configuration for an MLPHead with a floating second half of outputs.
+    This model can be useful together with Gaussian Distributions.
+    This gaussian distribution would be conditioned as follows:
+        - The first half of outputs from this model can be used as
+        state-dependent means when conditioning a gaussian distribution
+        - The second half are floating free biases that can be used as
+        state-independent standard deviations to condition a gaussian distribution.
+    The mean values are produced by an MLPHead, while the standard
+    deviations are added as floating free biases from a single 1D trainable variable
+    (not dependent on the net's inputs).
+    The output dimensions of the configured MLPHeadConfig must be even and are
+    divided by two to gain the output dimensions of each the mean-net and the
+    free std-variable.
+    Example:
+    .. testcode::
+        :skipif: True
+        # Configuration:
+        config = FreeLogStdMLPHeadConfig(
+            input_dims=[2],
+            hidden_layer_dims=[16],
+            hidden_layer_activation=None,
+            hidden_layer_use_layernorm=False,
+            hidden_layer_use_bias=True,
+            output_layer_dim=8,  # <- this must be an even size
+            output_layer_use_bias=True,
+        )
+        model = config.build(framework="tf2")
+        # Resulting stack in pseudocode:
+        # Linear(2, 16, bias=True)
+        # Linear(8, 8, bias=True)  # 16 / 2 = 8 -> 8 nodes for the mean
+        # Extra variable:
+        # Tensor((8,), float32)  # for the free (observation independent) std outputs
+    Example:
+    .. testcode::
+        :skipif: True
+        # Configuration:
+        config = FreeLogStdMLPHeadConfig(
+            input_dims=[2],
+            hidden_layer_dims=[31, 100],   # <- last idx must be an even size
+            hidden_layer_activation="relu",
+            hidden_layer_use_layernorm=False,
+            hidden_layer_use_bias=False,
+            output_layer_dim=None,  # use the last hidden layer as output layer
+        )
+        model = config.build(framework="torch")
+        # Resulting stack in pseudocode:
+        # Linear(2, 31, bias=False)
+        # ReLu()
+        # Linear(31, 50, bias=False)  # 100 / 2 = 50 -> 50 nodes for the mean
+        # ReLu()
+        # Extra variable:
+        # Tensor((50,), float32)  # for the free (observation independent) std outputs
+    """
+    def _validate(self, framework: str = "torch"):
+        if len(self.output_dims) > 1 or self.output_dims[0] % 2 == 1:
+            raise ValueError(
+                f"`output_layer_dim` ({self.ouput_layer_dim}) or the last value in "
+                f"`hidden_layer_dims` ({self.hidden_layer_dims}) of a "
+                "FreeLogStdMLPHeadConfig must be an even int (dividable by 2), "
+                "e.g. `output_layer_dim=8` or `hidden_layer_dims=[133, 128]`!"
+            )
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Model":
+        self._validate(framework=framework)
+        if framework == "torch":
+            from ray.rllib.core.models.torch.heads import TorchFreeLogStdMLPHead
+            return TorchFreeLogStdMLPHead(self)
+        else:
+            from ray.rllib.core.models.tf.heads import TfFreeLogStdMLPHead
+            return TfFreeLogStdMLPHead(self)
+@ExperimentalAPI
+@dataclass
+class CNNTransposeHeadConfig(ModelConfig):
+    """Configuration for a convolutional transpose head (decoder) network.
+    The configured Model transforms 1D-observations into an image space.
+    The stack of layers is composed of an initial Dense layer, followed by a sequence
+    of Conv2DTranspose layers.
+    `input_dims` describes the shape of the (1D) input tensor,
+    `initial_image_dims` describes the input into the first Conv2DTranspose
+    layer, where the translation from `input_dim` to `initial_image_dims` is done
+    via the initial Dense layer (w/o activation, w/o layer-norm, and w/ bias).
+    Beyond that, each layer specified by `cnn_transpose_filter_specifiers`
+    is followed by an activation function according to `cnn_transpose_activation`.
+    `output_dims` is reached after the final Conv2DTranspose layer.
+    Not that the last Conv2DTranspose layer is never activated and never layer-norm'd
+    regardless of the other settings.
+    An example for a single conv2d operation is as follows:
+    Input "image" is (4, 4, 24) (not yet strided), padding is "same", stride=2,
+    kernel=5.
+    First, the input "image" is strided (with stride=2):
+    Input image (4x4 (x24)):
+    A B C D
+    E F G H
+    I J K L
+    M N O P
+    Stride with stride=2 -> (7x7 (x24))
+    A 0 B 0 C 0 D
+    0 0 0 0 0 0 0
+    E 0 F 0 G 0 H
+    0 0 0 0 0 0 0
+    I 0 J 0 K 0 L
+    0 0 0 0 0 0 0
+    M 0 N 0 O 0 P
+    Then this strided "image" (strided_size=7x7) is padded (exact padding values will be
+    computed by the model):
+    Padding -> (left=3, right=2, top=3, bottom=2)
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 A 0 B 0 C 0 D 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 E 0 F 0 G 0 H 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 I 0 J 0 K 0 L 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 M 0 N 0 O 0 P 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0
+    Then deconvolution with kernel=5 yields an output "image" of 8x8 (x num output
+    filters).
+    Attributes:
+        input_dims: The input dimensions of the network. This must be a 1D tensor.
+        initial_image_dims: The shape of the input to the first
+            Conv2DTranspose layer. We will make sure the input is transformed to
+            these dims via a preceding initial Dense layer, followed by a reshape,
+            before entering the Conv2DTranspose stack.
+        initial_dense_weights_initializer: The initializer function or class to use for
+            weight initialization in the initial dense layer. If `None` the default
+            initializer of the respective dense layer of a framework (`"torch"` or
+            `"tf2"`) is used. Note, all initializers defined in the framework `"tf2`)
+            are allowed. For `"torch"` only the in-place initializers, i.e. ending with
+            an underscore "_" are allowed.
+        initial_dense_weights_initializer_config: Configuration to pass into the
+            initializer defined in `initial_dense_weights_initializer`.
+        initial_dense_bias_initializer: The initializer function or class to use for
+            bias initialization in the initial dense layer. If `None` the default
+            initializer of the respective CNN layer of a framework (`"torch"` or `"tf2"`
+            ) is used. For `"torch"` only the in-place initializers, i.e. ending with an
+            underscore "_" are allowed.
+        initial_dense_bias_initializer_config: Configuration to pass into the
+            initializer defined in `initial_dense_bias_initializer`.
+        cnn_transpose_filter_specifiers: A list of lists, where each element of an inner
+            list contains elements of the form
+            `[number of channels/filters, [kernel width, kernel height], stride]` to
+            specify a convolutional layer stacked in order of the outer list.
+        cnn_transpose_use_bias: Whether to use bias on all Conv2DTranspose layers.
+        cnn_transpose_activation: The activation function to use after each layer
+            (except for the output).
+        cnn_transpose_use_layernorm: Whether to insert a LayerNorm functionality
+            in between each Conv2DTranspose layer's output and its activation.
+        cnn_transpose_kernel_initializer: The initializer function or class to use for
+            kernel initialization in the CNN layers. If `None` the default initializer
+            of the respective CNN layer of a framework (`"torch"` or `"tf2"`) is used.
+            Note, all initializers defined in the framework `"tf2`) are allowed. For
+            `"torch"` only the in-place initializers, i.e. ending with an underscore "_"
+            are allowed.
+        cnn_transpose_kernel_initializer_config: Configuration to pass into the
+            initializer defined in `cnn_transpose_kernel_initializer`.
+        cnn_transpose_bias_initializer: The initializer function or class to use for
+            bias initialization in the CNN layers. If `None` the default initializer of
+            the respective CNN layer of a framework (`"torch"` or `"tf2"`) is used.
+            For `"torch"` only the in-place initializers, i.e. ending with an underscore
+            "_" are allowed.
+        cnn_transpose_bias_initializer_config: Configuration to pass into the
+            initializer defined in `cnn_transpose_bias_initializer`.
+    Example:
+    .. testcode::
+        :skipif: True
+        # Configuration:
+        config = CNNTransposeHeadConfig(
+            input_dims=[10],  # 1D input vector (possibly coming from another NN)
+            initial_image_dims=[4, 4, 96],  # first image input to deconv stack
+            # Initializer for TensorFlow.
+            initial_dense_weights_initializer="HeNormal",
+            initial_dense_weights_initializer={"seed": 334},
+            cnn_transpose_filter_specifiers=[
+                [48, [4, 4], 2],
+                [24, [4, 4], 2],
+                [3, [4, 4], 2],
+            ],
+            cnn_transpose_activation="silu",  # or "swish", which is the same
+            cnn_transpose_use_layernorm=False,
+            cnn_use_bias=True,
+        )
+        model = config.build(framework="torch)
+        # Resulting stack in pseudocode:
+        # Linear(10, 4*4*24)
+        # Conv2DTranspose(
+        #   in_channels=96, out_channels=48,
+        #   kernel_size=[4, 4], stride=2, bias=True,
+        # )
+        # Swish()
+        # Conv2DTranspose(
+        #   in_channels=48, out_channels=24,
+        #   kernel_size=[4, 4], stride=2, bias=True,
+        # )
+        # Swish()
+        # Conv2DTranspose(
+        #   in_channels=24, out_channels=3,
+        #   kernel_size=[4, 4], stride=2, bias=True,
+        # )
+    Example:
+    .. testcode::
+        :skipif: True
+        # Configuration:
+        config = CNNTransposeHeadConfig(
+            input_dims=[128],  # 1D input vector (possibly coming from another NN)
+            initial_image_dims=[4, 4, 32],  # first image input to deconv stack
+            cnn_transpose_filter_specifiers=[
+                [16, 4, 2],
+                [3, 4, 2],
+            ],
+            cnn_transpose_activation="relu",
+            cnn_transpose_use_layernorm=True,
+            cnn_use_bias=False,
+            # Initializer for `framework="tf2"`.
+            # Note, for Torch only in-place initializers are allowed.
+            cnn_transpose_kernel_initializer="xavier_normal_",
+            cnn_transpose_kernel_initializer_config={"gain": 0.8},
+        )
+        model = config.build(framework="torch)
+        # Resulting stack in pseudocode:
+        # Linear(128, 4*4*32, bias=True)  # bias always True for initial dense layer
+        # Conv2DTranspose(
+        #   in_channels=32, out_channels=16,
+        #   kernel_size=[4, 4], stride=2, bias=False,
+        # )
+        # LayerNorm((-3, -2, -1))  # layer normalize over last 3 axes
+        # ReLU()
+        # Conv2DTranspose(
+        #   in_channels=16, out_channels=3,
+        #   kernel_size=[4, 4], stride=2, bias=False,
+        # )
+    """
+    input_dims: Union[List[int], Tuple[int]] = None
+    initial_image_dims: Union[List[int], Tuple[int]] = field(
+        default_factory=lambda: [4, 4, 96]
+    )
+    initial_dense_weights_initializer: Optional[Union[str, Callable]] = None
+    initial_dense_weights_initializer_config: Optional[Dict] = None
+    initial_dense_bias_initializer: Optional[Union[str, Callable]] = None
+    initial_dense_bias_initializer_config: Optional[Dict] = None
+    cnn_transpose_filter_specifiers: List[List[Union[int, List[int]]]] = field(
+        default_factory=lambda: [[48, [4, 4], 2], [24, [4, 4], 2], [3, [4, 4], 2]]
+    )
+    cnn_transpose_use_bias: bool = True
+    cnn_transpose_activation: str = "relu"
+    cnn_transpose_use_layernorm: bool = False
+    cnn_transpose_kernel_initializer: Optional[Union[str, Callable]] = None
+    cnn_transpose_kernel_initializer_config: Optional[Dict] = None
+    cnn_transpose_bias_initializer: Optional[Union[str, Callable]] = None
+    cnn_transpose_bias_initializer_config: Optional[Dict] = None
+    @property
+    def output_dims(self):
+        # Infer output dims, layer by layer.
+        dims = self.initial_image_dims
+        for filter_spec in self.cnn_transpose_filter_specifiers:
+            # Same padding.
+            num_filters, kernel, stride = filter_spec
+            # Compute stride output size first (striding is performed first in a
+            # conv transpose layer.
+            stride_w, stride_h = (stride, stride) if isinstance(stride, int) else stride
+            dims = [
+                dims[0] * stride_w - (stride_w - 1),
+                dims[1] * stride_h - (stride_h - 1),
+                num_filters,
+            ]
+            # TODO (Sven): Support "valid" padding for Conv2DTranspose layers, too.
+            #  Analogous to Conv2D Layers in a CNNEncoder.
+            # Apply the correct padding. Note that this might be asymetrical, meaning
+            # left padding might be != right padding, same for top/bottom.
+            _, padding_out_size = same_padding_transpose_after_stride(
+                (dims[0], dims[1]), kernel, stride
+            )
+            # Perform conv transpose operation with the kernel.
+            kernel_w, kernel_h = (kernel, kernel) if isinstance(kernel, int) else kernel
+            dims = [
+                padding_out_size[0] - (kernel_w - 1),
+                padding_out_size[1] - (kernel_h - 1),
+                num_filters,
+            ]
+        return tuple(dims)
+    def _validate(self, framework: str = "torch"):
+        if len(self.input_dims) != 1:
+            raise ValueError(
+                f"`input_dims` ({self.input_dims}) of CNNTransposeHeadConfig must be a "
+                "3D tensor (image-like) with the dimensions meaning: width x height x "
+                "num_filters, e.g. `[4, 4, 92]`!"
+            )
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Model":
+        self._validate(framework)
+        if framework == "torch":
+            from ray.rllib.core.models.torch.heads import TorchCNNTransposeHead
+            return TorchCNNTransposeHead(self)
+        elif framework == "tf2":
+            from ray.rllib.core.models.tf.heads import TfCNNTransposeHead
+            return TfCNNTransposeHead(self)
+@ExperimentalAPI
+@dataclass
+class CNNEncoderConfig(ModelConfig):
+    """Configuration for a convolutional (encoder) network.
+    The configured CNN encodes 3D-observations into a latent space.
+    The stack of layers is composed of a sequence of convolutional layers.
+    `input_dims` describes the shape of the input tensor. Beyond that, each layer
+    specified by `filter_specifiers` is followed by an activation function according
+    to `filter_activation`.
+    `output_dims` is reached by either the final convolutional layer's output directly
+    OR by flatten this output.
+    See ModelConfig for usage details.
+    Example:
+    .. testcode::
+        # Configuration:
+        config = CNNEncoderConfig(
+            input_dims=[84, 84, 3],  # must be 3D tensor (image: w x h x C)
+            cnn_filter_specifiers=[
+                [16, [8, 8], 4],
+                [32, [4, 4], 2],
+            ],
+            cnn_activation="relu",
+            cnn_use_layernorm=False,
+            cnn_use_bias=True,
+        )
+        model = config.build(framework="torch")
+        # Resulting stack in pseudocode:
+        # Conv2D(
+        #   in_channels=3, out_channels=16,
+        #   kernel_size=[8, 8], stride=[4, 4], bias=True,
+        # )
+        # ReLU()
+        # Conv2D(
+        #   in_channels=16, out_channels=32,
+        #   kernel_size=[4, 4], stride=[2, 2], bias=True,
+        # )
+        # ReLU()
+        # Conv2D(
+        #   in_channels=32, out_channels=1,
+        #   kernel_size=[1, 1], stride=[1, 1], bias=True,
+        # )
+        # Flatten()
+    Attributes:
+        input_dims: The input dimension of the network. These must be given in the
+            form of `(width, height, channels)`.
+        cnn_filter_specifiers: A list in which each element is another (inner) list
+            of either the following forms:
+            `[number of channels/filters, kernel, stride]`
+            OR:
+            `[number of channels/filters, kernel, stride, padding]`, where `padding`
+            can either be "same" or "valid".
+            When using the first format w/o the `padding` specifier, `padding` is "same"
+            by default. Also, `kernel` and `stride` may be provided either as single
+            ints (square) or as a tuple/list of two ints (width- and height dimensions)
+            for non-squared kernel/stride shapes.
+            A good rule of thumb for constructing CNN stacks is:
+            When using padding="same", the input "image" will be reduced in size by
+            the factor `stride`, e.g. input=(84, 84, 3) stride=2 kernel=x padding="same"
+            filters=16 -> output=(42, 42, 16).
+            For example, if you would like to reduce an Atari image from its original
+            (84, 84, 3) dimensions down to (6, 6, F), you can construct the following
+            stack and reduce the w x h dimension of the image by 2 in each layer:
+            [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]] -> output=(6, 6, 128)
+        cnn_use_bias: Whether to use bias on all Conv2D layers.
+        cnn_activation: The activation function to use after each layer (
+            except for the output). The default activation for Conv2d layers is "relu".
+        cnn_use_layernorm: Whether to insert a LayerNorm functionality
+            in between each CNN layer's output and its activation. Note that
+            the output layer.
+        cnn_kernel_initializer: The initializer function or class to use for kernel
+            initialization in the CNN layers. If `None` the default initializer of the
+            respective CNN layer of a framework (`"torch"` or `"tf2"`) is used. Note,
+            all initializers defined in the framework `"tf2`) are allowed. For `"torch"`
+            only the in-place initializers, i.e. ending with an underscore "_" are
+            allowed.
+        cnn_kernel_initializer_config: Configuration to pass into the initializer
+            defined in `cnn_kernel_initializer`.
+        cnn_bias_initializer: The initializer function or class to use for bias
+            initialization in the CNN layers. If `None` the default initializer of
+            the respective CNN layer of a framework (`"torch"` or `"tf2"`) is used.
+            For `"torch"` only the in-place initializers, i.e. ending with an underscore
+            "_" are allowed.
+        cnn_bias_initializer_config: Configuration to pass into the initializer defined
+            in  `cnn_bias_initializer`.
+        flatten_at_end: Whether to flatten the output of the last conv 2D layer into
+            a 1D tensor. By default, this is True. Note that if you set this to False,
+            you might simply stack another CNNEncoder on top of this one (maybe with
+            different activation and bias settings).
+    """
+    input_dims: Union[List[int], Tuple[int]] = None
+    cnn_filter_specifiers: List[List[Union[int, List[int]]]] = field(
+        default_factory=lambda: [[16, [4, 4], 2], [32, [4, 4], 2], [64, [8, 8], 2]]
+    )
+    cnn_use_bias: bool = True
+    cnn_activation: str = "relu"
+    cnn_use_layernorm: bool = False
+    cnn_kernel_initializer: Optional[Union[str, Callable]] = None
+    cnn_kernel_initializer_config: Optional[Dict] = None
+    cnn_bias_initializer: Optional[Union[str, Callable]] = None
+    cnn_bias_initializer_config: Optional[Dict] = None
+    flatten_at_end: bool = True
+    @property
+    def output_dims(self):
+        if not self.input_dims:
+            return None
+        # Infer output dims, layer by layer.
+        dims = self.input_dims  # Creates a copy (works for tuple/list).
+        for filter_spec in self.cnn_filter_specifiers:
+            # Padding not provided, "same" by default.
+            if len(filter_spec) == 3:
+                num_filters, kernel, stride = filter_spec
+                padding = "same"
+            # Padding option provided, use given value.
+            else:
+                num_filters, kernel, stride, padding = filter_spec
+            # Same padding.
+            if padding == "same":
+                _, dims = same_padding(dims[:2], kernel, stride)
+            # Valid padding.
+            else:
+                dims = valid_padding(dims[:2], kernel, stride)
+            # Add depth (num_filters) to the end (our utility functions for same/valid
+            # only return the image width/height).
+            dims = [dims[0], dims[1], num_filters]
+        # Flatten everything.
+        if self.flatten_at_end:
+            return (int(np.prod(dims)),)
+        return tuple(dims)
+    def _validate(self, framework: str = "torch"):
+        if len(self.input_dims) != 3:
+            raise ValueError(
+                f"`input_dims` ({self.input_dims}) of CNNEncoderConfig must be a 3D "
+                "tensor (image) with the dimensions meaning: width x height x "
+                "channels, e.g. `[64, 64, 3]`!"
+            )
+        if not self.flatten_at_end and len(self.output_dims) != 3:
+            raise ValueError(
+                f"`output_dims` ({self.output_dims}) of CNNEncoderConfig must be "
+                "3D, e.g. `[4, 4, 128]`, b/c your `flatten_at_end` setting is False! "
+                "`output_dims` is an inferred value, hence other settings might be "
+                "wrong."
+            )
+        elif self.flatten_at_end and len(self.output_dims) != 1:
+            raise ValueError(
+                f"`output_dims` ({self.output_dims}) of CNNEncoderConfig must be "
+                "1D, e.g. `[32]`, b/c your `flatten_at_end` setting is True! "
+                "`output_dims` is an inferred value, hence other settings might be "
+                "wrong."
+            )
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Model":
+        self._validate(framework)
+        if framework == "torch":
+            from ray.rllib.core.models.torch.encoder import TorchCNNEncoder
+            return TorchCNNEncoder(self)
+        elif framework == "tf2":
+            from ray.rllib.core.models.tf.encoder import TfCNNEncoder
+            return TfCNNEncoder(self)
+@ExperimentalAPI
+@dataclass
+class MLPEncoderConfig(_MLPConfig):
+    """Configuration for an MLP that acts as an encoder.
+    See _MLPConfig for usage details.
+    Example:
+    .. testcode::
+        # Configuration:
+        config = MLPEncoderConfig(
+            input_dims=[4],  # must be 1D tensor
+            hidden_layer_dims=[16],
+            hidden_layer_activation="relu",
+            hidden_layer_use_layernorm=False,
+            output_layer_dim=None,  # maybe None or an int
+        )
+        model = config.build(framework="torch")
+        # Resulting stack in pseudocode:
+        # Linear(4, 16, bias=True)
+        # ReLU()
+    Example:
+    .. testcode::
+        # Configuration:
+        config = MLPEncoderConfig(
+            input_dims=[2],
+            hidden_layer_dims=[8, 8],
+            hidden_layer_activation="silu",
+            hidden_layer_use_layernorm=True,
+            hidden_layer_use_bias=False,
+            output_layer_dim=4,
+            output_layer_activation="tanh",
+            output_layer_use_bias=False,
+        )
+        model = config.build(framework="tf2")
+        # Resulting stack in pseudocode:
+        # Linear(2, 8, bias=False)
+        # LayerNorm((8,))  # layernorm always before activation
+        # SiLU()
+        # Linear(8, 8, bias=False)
+        # LayerNorm((8,))  # layernorm always before activation
+        # SiLU()
+        # Linear(8, 4, bias=False)
+        # Tanh()
+    """
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Encoder":
+        self._validate(framework)
+        if framework == "torch":
+            from ray.rllib.core.models.torch.encoder import TorchMLPEncoder
+            return TorchMLPEncoder(self)
+        else:
+            from ray.rllib.core.models.tf.encoder import TfMLPEncoder
+            return TfMLPEncoder(self)
+@ExperimentalAPI
+@dataclass
+class RecurrentEncoderConfig(ModelConfig):
+    """Configuration for an LSTM-based or a GRU-based encoder.
+    The encoder consists of...
+    - Zero or one tokenizers
+    - N LSTM/GRU layers stacked on top of each other and feeding
+    their outputs as inputs to the respective next layer.
+    This makes for the following flow of tensors:
+    Inputs
+    |
+    [Tokenizer if present]
+    |
+    LSTM layer 1
+    |
+    (...)
+    |
+    LSTM layer n
+    |
+    Outputs
+    The internal state is structued as (num_layers, B, hidden-size) for all hidden
+    state components, e.g.
+    h- and c-states of the LSTM layer(s) or h-state of the GRU layer(s).
+    For example, the hidden states of an LSTMEncoder with num_layers=2 and hidden_dim=8
+    would be: {"h": (2, B, 8), "c": (2, B, 8)}.
+    `output_dims` is reached by the last recurrent layer's dimension, which is always
+    the `hidden_dims` value.
+    Example:
+    .. testcode::
+        # Configuration:
+        config = RecurrentEncoderConfig(
+            recurrent_layer_type="lstm",
+            input_dims=[16],  # must be 1D tensor
+            hidden_dim=128,
+            num_layers=2,
+            use_bias=True,
+        )
+        model = config.build(framework="torch")
+        # Resulting stack in pseudocode:
+        # LSTM(16, 128, bias=True)
+        # LSTM(128, 128, bias=True)
+        # Resulting shape of the internal states (c- and h-states):
+        # (2, B, 128) for each c- and h-states.
+    Example:
+    .. testcode::
+        # Configuration:
+        config = RecurrentEncoderConfig(
+            recurrent_layer_type="gru",
+            input_dims=[32],  # must be 1D tensor
+            hidden_dim=64,
+            num_layers=1,
+            use_bias=False,
+        )
+        model = config.build(framework="torch")
+        # Resulting stack in pseudocode:
+        # GRU(32, 64, bias=False)
+        # Resulting shape of the internal state:
+        # (1, B, 64)
+    Attributes:
+        input_dims: The input dimensions. Must be 1D. This is the 1D shape of the tensor
+            that goes into the first recurrent layer.
+        recurrent_layer_type: The type of the recurrent layer(s).
+            Either "lstm" or "gru".
+        hidden_dim: The size of the hidden internal state(s) of the recurrent layer(s).
+            For example, for an LSTM, this would be the size of the c- and h-tensors.
+        num_layers: The number of recurrent (LSTM or GRU) layers to stack.
+        batch_major: Wether the input is batch major (B, T, ..) or
+            time major (T, B, ..).
+        hidden_weights_initializer: The initializer function or class to use for
+            kernel initialization in the hidden layers. If `None` the default
+            initializer of the respective recurrent layer of a framework (`"torch"` or
+            `"tf2"`) is used. Note, all initializers defined in the frameworks (
+            `"torch"` or `"tf2`) are allowed. For `"torch"` only the in-place
+            initializers, i.e. ending with an underscore "_" are allowed.
+        hidden_weights_initializer_config: Configuration to pass into the
+            initializer defined in `hidden_weights_initializer`.
+        use_bias: Whether to use bias on the recurrent layers in the network.
+        hidden_bias_initializer: The initializer function or class to use for bias
+            initialization in the hidden layers. If `None` the default initializer of
+            the respective recurrent layer of a framework (`"torch"` or `"tf2"`) is
+            used. For `"torch"` only the in-place initializers, i.e. ending with an
+            underscore "_" are allowed.
+        hidden_bias_initializer_config: Configuration to pass into the initializer
+            defined in `hidden_bias_initializer`.
+        tokenizer_config: A ModelConfig to build tokenizers for observations,
+            actions and other spaces.
+    """
+    recurrent_layer_type: str = "lstm"
+    hidden_dim: int = None
+    num_layers: int = None
+    batch_major: bool = True
+    hidden_weights_initializer: Optional[Union[str, Callable]] = None
+    hidden_weights_initializer_config: Optional[Dict] = None
+    use_bias: bool = True
+    hidden_bias_initializer: Optional[Union[str, Callable]] = None
+    hidden_bias_initializer_config: Optional[Dict] = None
+    tokenizer_config: ModelConfig = None
+    @property
+    def output_dims(self):
+        return (self.hidden_dim,)
+    def _validate(self, framework: str = "torch"):
+        """Makes sure that settings are valid."""
+        if self.recurrent_layer_type not in ["gru", "lstm"]:
+            raise ValueError(
+                f"`recurrent_layer_type` ({self.recurrent_layer_type}) of "
+                "RecurrentEncoderConfig must be 'gru' or 'lstm'!"
+            )
+        if self.input_dims is not None and len(self.input_dims) != 1:
+            raise ValueError(
+                f"`input_dims` ({self.input_dims}) of RecurrentEncoderConfig must be "
+                "1D, e.g. `[32]`!"
+            )
+        if len(self.output_dims) != 1:
+            raise ValueError(
+                f"`output_dims` ({self.output_dims}) of RecurrentEncoderConfig must be "
+                "1D, e.g. `[32]`! This is an inferred value, hence other settings might"
+                " be wrong."
+            )
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Encoder":
+        if framework == "torch":
+            from ray.rllib.core.models.torch.encoder import (
+                TorchGRUEncoder as GRU,
+                TorchLSTMEncoder as LSTM,
+            )
+        else:
+            from ray.rllib.core.models.tf.encoder import (
+                TfGRUEncoder as GRU,
+                TfLSTMEncoder as LSTM,
+            )
+        if self.recurrent_layer_type == "lstm":
+            return LSTM(self)
+        else:
+            return GRU(self)
+@ExperimentalAPI
+@dataclass
+class ActorCriticEncoderConfig(ModelConfig):
+    """Configuration for an ActorCriticEncoder.
+    The base encoder functions like other encoders in RLlib. It is wrapped by the
+    ActorCriticEncoder to provides a shared encoder Model to use in RLModules that
+    provides twofold outputs: one for the actor and one for the critic. See
+    ModelConfig for usage details.
+    Attributes:
+        base_encoder_config: The configuration for the wrapped encoder(s).
+        shared: Whether the base encoder is shared between the actor and critic.
+        inference_only: Whether the configured encoder will only ever be used as an
+            actor-encoder, never as a value-function encoder. Thus, if True and `shared`
+            is False, will only build the actor-related components.
+    """
+    base_encoder_config: ModelConfig = None
+    shared: bool = True
+    inference_only: bool = False
+    @_framework_implemented()
+    def build(self, framework: str = "torch") -> "Encoder":
+        if framework == "torch":
+            from ray.rllib.core.models.torch.encoder import (
+                TorchActorCriticEncoder,
+                TorchStatefulActorCriticEncoder,
+            )
+            if isinstance(self.base_encoder_config, RecurrentEncoderConfig):
+                return TorchStatefulActorCriticEncoder(self)
+            else:
+                return TorchActorCriticEncoder(self)
+        else:
+            from ray.rllib.core.models.tf.encoder import (
+                TfActorCriticEncoder,
+                TfStatefulActorCriticEncoder,
+            )
+            if isinstance(self.base_encoder_config, RecurrentEncoderConfig):
+                return TfStatefulActorCriticEncoder(self)
+            else:
+                return TfActorCriticEncoder(self)

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (200 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_base.cpython-311.pyc ADDED Viewed

Binary file (14.1 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/specs_dict.cpython-311.pyc ADDED Viewed

Binary file (4.05 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/__pycache__/typing.cpython-311.pyc ADDED Viewed

Binary file (648 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_base.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import abc
+from copy import deepcopy
+import numpy as np
+from typing import Any, Optional, Dict, List, Tuple, Union, Type
+from ray.rllib.utils import try_import_jax, try_import_tf, try_import_torch
+from ray.rllib.utils.deprecation import Deprecated
+from ray.rllib.utils.typing import TensorType
+torch, _ = try_import_torch()
+_, tf, _ = try_import_tf()
+jax, _ = try_import_jax()
+_INVALID_INPUT_DUP_DIM = "Duplicate dimension names in shape ({})"
+_INVALID_INPUT_UNKNOWN_DIM = "Unknown dimension name {} in shape ({})"
+_INVALID_INPUT_POSITIVE = "Dimension {} in ({}) must be positive, got {}"
+_INVALID_INPUT_INT_DIM = "Dimension {} in ({}) must be integer, got {}"
+_INVALID_SHAPE = "Expected shape {} but found {}"
+_INVALID_TYPE = "Expected data type {} but found {}"
+@Deprecated(
+    help="The Spec checking APIs have been deprecated and cancelled without "
+    "replacement.",
+    error=False,
+)
+class Spec(abc.ABC):
+    @staticmethod
+    @abc.abstractmethod
+    def validate(self, data: Any) -> None:
+        pass
+@Deprecated(
+    help="The Spec checking APIs have been deprecated and cancelled without "
+    "replacement.",
+    error=False,
+)
+class TypeSpec(Spec):
+    def __init__(self, dtype: Type) -> None:
+        self.dtype = dtype
+    def __repr__(self):
+        return f"TypeSpec({str(self.dtype)})"
+    def validate(self, data: Any) -> None:
+        if not isinstance(data, self.dtype):
+            raise ValueError(_INVALID_TYPE.format(self.dtype, type(data)))
+    def __eq__(self, other: "TypeSpec") -> bool:
+        if not isinstance(other, TypeSpec):
+            return False
+        return self.dtype == other.dtype
+    def __ne__(self, other: "TypeSpec") -> bool:
+        return not self == other
+@Deprecated(
+    help="The Spec checking APIs have been deprecated and cancelled without "
+    "replacement.",
+    error=False,
+)
+class TensorSpec(Spec):
+    def __init__(
+        self,
+        shape: str,
+        *,
+        dtype: Optional[Any] = None,
+        framework: Optional[str] = None,
+        **shape_vals: int,
+    ) -> None:
+        self._expected_shape = self._parse_expected_shape(shape, shape_vals)
+        self._full_shape = self._get_full_shape()
+        self._dtype = dtype
+        self._framework = framework
+        if framework not in ("tf2", "torch", "np", "jax", None):
+            raise ValueError(f"Unknown framework {self._framework}")
+        self._type = self._get_expected_type()
+    def _get_expected_type(self) -> Type:
+        if self._framework == "torch":
+            return torch.Tensor
+        elif self._framework == "tf2":
+            return tf.Tensor
+        elif self._framework == "np":
+            return np.ndarray
+        elif self._framework == "jax":
+            jax, _ = try_import_jax()
+            return jax.numpy.ndarray
+        elif self._framework is None:
+            # Don't restrict the type of the tensor if no framework is specified.
+            return object
+    def get_shape(self, tensor: TensorType) -> Tuple[int]:
+        if self._framework == "tf2":
+            return tuple(
+                int(i) if i is not None else None for i in tensor.shape.as_list()
+            )
+        return tuple(tensor.shape)
+    def get_dtype(self, tensor: TensorType) -> Any:
+        return tensor.dtype
+    @property
+    def dtype(self) -> Any:
+        return self._dtype
+    @property
+    def shape(self) -> Tuple[Union[int, str]]:
+        return self._expected_shape
+    @property
+    def type(self) -> Type:
+        return self._type
+    @property
+    def full_shape(self) -> Tuple[int]:
+        return self._full_shape
+    def rdrop(self, n: int) -> "TensorSpec":
+        assert isinstance(n, int) and n >= 0, "n must be a positive integer or zero"
+        copy_ = deepcopy(self)
+        copy_._expected_shape = copy_.shape[:-n]
+        copy_._full_shape = self._get_full_shape()
+        return copy_
+    def append(self, spec: "TensorSpec") -> "TensorSpec":
+        copy_ = deepcopy(self)
+        copy_._expected_shape = (*copy_.shape, *spec.shape)
+        copy_._full_shape = self._get_full_shape()
+        return copy_
+    def validate(self, tensor: TensorType) -> None:
+        if not isinstance(tensor, self.type):
+            raise ValueError(_INVALID_TYPE.format(self.type, type(tensor).__name__))
+        shape = self.get_shape(tensor)
+        if len(shape) != len(self._expected_shape):
+            raise ValueError(_INVALID_SHAPE.format(self._expected_shape, shape))
+        for expected_d, actual_d in zip(self._expected_shape, shape):
+            if isinstance(expected_d, int) and expected_d != actual_d:
+                raise ValueError(_INVALID_SHAPE.format(self._expected_shape, shape))
+        dtype = tensor.dtype
+        if self.dtype and dtype != self.dtype:
+            raise ValueError(_INVALID_TYPE.format(self.dtype, tensor.dtype))
+    def fill(self, fill_value: Union[float, int] = 0) -> TensorType:
+        if self._framework == "torch":
+            return torch.full(self.full_shape, fill_value, dtype=self.dtype)
+        elif self._framework == "tf2":
+            if self.dtype:
+                return tf.ones(self.full_shape, dtype=self.dtype) * fill_value
+            return tf.fill(self.full_shape, fill_value)
+        elif self._framework == "np":
+            return np.full(self.full_shape, fill_value, dtype=self.dtype)
+        elif self._framework == "jax":
+            return jax.numpy.full(self.full_shape, fill_value, dtype=self.dtype)
+        elif self._framework is None:
+            raise ValueError(
+                "Cannot fill tensor without providing `framework` to TensorSpec. "
+                "This TensorSpec was instantiated without `framework`."
+            )
+    def _get_full_shape(self) -> Tuple[int]:
+        sampled_shape = tuple()
+        for d in self._expected_shape:
+            if isinstance(d, int):
+                sampled_shape += (d,)
+            else:
+                sampled_shape += (1,)
+        return sampled_shape
+    def _parse_expected_shape(self, shape: str, shape_vals: Dict[str, int]) -> tuple:
+        d_names = shape.replace(" ", "").split(",")
+        self._validate_shape_vals(d_names, shape_vals)
+        expected_shape = tuple(shape_vals.get(d, d) for d in d_names)
+        return expected_shape
+    def _validate_shape_vals(
+        self, d_names: List[str], shape_vals: Dict[str, int]
+    ) -> None:
+        d_names_set = set(d_names)
+        if len(d_names_set) != len(d_names):
+            raise ValueError(_INVALID_INPUT_DUP_DIM.format(",".join(d_names)))
+        for d_name in shape_vals:
+            if d_name not in d_names_set:
+                raise ValueError(
+                    _INVALID_INPUT_UNKNOWN_DIM.format(d_name, ",".join(d_names))
+                )
+            d_value = shape_vals.get(d_name, None)
+            if d_value is not None:
+                if not isinstance(d_value, int):
+                    raise ValueError(
+                        _INVALID_INPUT_INT_DIM.format(
+                            d_name, ",".join(d_names), type(d_value)
+                        )
+                    )
+                if d_value <= 0:
+                    raise ValueError(
+                        _INVALID_INPUT_POSITIVE.format(
+                            d_name, ",".join(d_names), d_value
+                        )
+                    )
+    def __repr__(self) -> str:
+        return f"TensorSpec(shape={tuple(self.shape)}, dtype={self.dtype})"
+    def __eq__(self, other: "TensorSpec") -> bool:
+        if not isinstance(other, TensorSpec):
+            return False
+        return self.shape == other.shape and self.dtype == other.dtype
+    def __ne__(self, other: "TensorSpec") -> bool:
+        return not self == other

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/specs_dict.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from typing import Any, Dict
+import tree
+from ray.rllib.core.models.specs.specs_base import Spec
+from ray.rllib.utils import force_tuple
+_MISSING_KEYS_FROM_DATA = (
+    "The data dict does not match the model specs. Keys {} are "
+    "in the spec dict but not on the data dict. Data keys are {}"
+)
+_TYPE_MISMATCH = (
+    "The data does not match the spec. The data element "
+    "{} has type {} (expected type {})."
+)
+DATA_TYPE = Dict[str, Any]
+IS_NOT_PROPERTY = "Spec {} must be a property of the class {}."
+class SpecDict(dict, Spec):
+    def validate(
+        self,
+        data: DATA_TYPE,
+        exact_match: bool = False,
+    ) -> None:
+        check = self.is_subset(self, data, exact_match)
+        if not check[0]:
+            data_keys_set = set()
+            def _map(path, s):
+                data_keys_set.add(force_tuple(path))
+            tree.map_structure_with_path(_map, data)
+            raise ValueError(_MISSING_KEYS_FROM_DATA.format(check[1], data_keys_set))
+    @staticmethod
+    def is_subset(spec_dict, data_dict, exact_match=False):
+        if exact_match:
+            tree.assert_same_structure(data_dict, spec_dict, check_types=False)
+        for key in spec_dict:
+            if key not in data_dict:
+                return False, key
+            if spec_dict[key] is None:
+                continue
+            elif isinstance(data_dict[key], dict):
+                if not isinstance(spec_dict[key], dict):
+                    return False, key
+                res = SpecDict.is_subset(spec_dict[key], data_dict[key], exact_match)
+                if not res[0]:
+                    return res
+            elif isinstance(spec_dict[key], dict):
+                return False, key
+            elif isinstance(spec_dict[key], Spec):
+                try:
+                    spec_dict[key].validate(data_dict[key])
+                except ValueError as e:
+                    raise ValueError(
+                        f"Mismatch found in data element {key}, "
+                        f"which is a TensorSpec: {e}"
+                    )
+            elif isinstance(spec_dict[key], (type, tuple)):
+                if not isinstance(data_dict[key], spec_dict[key]):
+                    raise ValueError(
+                        _TYPE_MISMATCH.format(
+                            key,
+                            type(data_dict[key]).__name__,
+                            spec_dict[key].__name__,
+                        )
+                    )
+            else:
+                raise ValueError(
+                    f"The spec type has to be either TensorSpec or Type. "
+                    f"got {type(spec_dict[key])}"
+                )
+        return True, None

.venv/lib/python3.11/site-packages/ray/rllib/core/models/specs/typing.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from typing import Union, Type, Tuple, List, TYPE_CHECKING
+if TYPE_CHECKING:
+    from ray.rllib.core.models.specs.specs_base import Spec
+NestedKeys = List[Union[str, Tuple[str, ...]]]
+Constraint = Union[Type, Tuple[Type, ...], "Spec"]
+# Either a flat list of nested keys or a tree of constraints
+SpecType = Union[NestedKeys]

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (197 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (3.97 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/encoder.cpython-311.pyc ADDED Viewed

Binary file (14.7 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/heads.cpython-311.pyc ADDED Viewed

Binary file (9.38 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/__pycache__/primitives.cpython-311.pyc ADDED Viewed

Binary file (21.2 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/base.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import abc
+import logging
+from typing import Tuple
+import numpy as np
+from ray.rllib.core.models.base import Model
+from ray.rllib.core.models.configs import ModelConfig
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.framework import try_import_tf
+logger = logging.getLogger(__name__)
+_, tf, _ = try_import_tf()
+class TfModel(Model, tf.keras.Model, abc.ABC):
+    """Base class for RLlib's TensorFlow models.
+    This class defines the interface for RLlib's TensorFlow models and checks
+    whether inputs and outputs of __call__ are checked with `check_input_specs()` and
+    `check_output_specs()` respectively.
+    """
+    def __init__(self, config: ModelConfig):
+        tf.keras.Model.__init__(self)
+        Model.__init__(self, config)
+    def call(self, input_dict: dict, **kwargs) -> dict:
+        """Returns the output of this model for the given input.
+        This method only makes sure that we have a spec-checked _forward() method.
+        Args:
+            input_dict: The input tensors.
+            **kwargs: Forward compatibility kwargs.
+        Returns:
+            dict: The output tensors.
+        """
+        return self._forward(input_dict, **kwargs)
+    @override(Model)
+    def get_num_parameters(self) -> Tuple[int, int]:
+        return (
+            sum(int(np.prod(w.shape)) for w in self.trainable_weights),
+            sum(int(np.prod(w.shape)) for w in self.non_trainable_weights),
+        )
+    @override(Model)
+    def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)):
+        for i, w in enumerate(self.trainable_weights + self.non_trainable_weights):
+            fill_val = value_sequence[i % len(value_sequence)]
+            w.assign(tf.fill(w.shape, fill_val))

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/encoder.py ADDED Viewed

	@@ -0,0 +1,315 @@

+from typing import Dict
+import tree  # pip install dm_tree
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.models.base import (
+    Encoder,
+    ActorCriticEncoder,
+    StatefulActorCriticEncoder,
+    ENCODER_OUT,
+    tokenize,
+)
+from ray.rllib.core.models.base import Model
+from ray.rllib.core.models.configs import (
+    ActorCriticEncoderConfig,
+    CNNEncoderConfig,
+    MLPEncoderConfig,
+    RecurrentEncoderConfig,
+)
+from ray.rllib.core.models.tf.base import TfModel
+from ray.rllib.core.models.tf.primitives import TfMLP, TfCNN
+from ray.rllib.models.utils import get_initializer_fn
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.framework import try_import_tf
+_, tf, _ = try_import_tf()
+class TfActorCriticEncoder(TfModel, ActorCriticEncoder):
+    """An encoder that can hold two encoders."""
+    framework = "tf2"
+    def __init__(self, config: ActorCriticEncoderConfig) -> None:
+        # We have to call TfModel.__init__ first, because it calls the constructor of
+        # tf.keras.Model, which is required to be called before models are created.
+        TfModel.__init__(self, config)
+        ActorCriticEncoder.__init__(self, config)
+class TfStatefulActorCriticEncoder(TfModel, StatefulActorCriticEncoder):
+    """A stateful actor-critic encoder for torch."""
+    framework = "tf2"
+    def __init__(self, config: ActorCriticEncoderConfig) -> None:
+        # We have to call TfModel.__init__ first, because it calls the constructor of
+        # tf.keras.Model, which is required to be called before models are created.
+        TfModel.__init__(self, config)
+        StatefulActorCriticEncoder.__init__(self, config)
+class TfCNNEncoder(TfModel, Encoder):
+    def __init__(self, config: CNNEncoderConfig) -> None:
+        TfModel.__init__(self, config)
+        Encoder.__init__(self, config)
+        # Add an input layer for the Sequential, created below. This is really
+        # important to be able to derive the model's trainable_variables early on
+        # (inside our Learners).
+        layers = [tf.keras.layers.Input(shape=config.input_dims)]
+        # The bare-bones CNN (no flatten, no succeeding dense).
+        cnn = TfCNN(
+            input_dims=config.input_dims,
+            cnn_filter_specifiers=config.cnn_filter_specifiers,
+            cnn_activation=config.cnn_activation,
+            cnn_use_layernorm=config.cnn_use_layernorm,
+            cnn_use_bias=config.cnn_use_bias,
+            cnn_kernel_initializer=config.cnn_kernel_initializer,
+            cnn_kernel_initializer_config=config.cnn_kernel_initializer_config,
+            cnn_bias_initializer=config.cnn_bias_initializer,
+            cnn_bias_initializer_config=config.cnn_bias_initializer_config,
+        )
+        layers.append(cnn)
+        # Add a flatten operation to move from 2/3D into 1D space.
+        if config.flatten_at_end:
+            layers.append(tf.keras.layers.Flatten())
+        # Create the network from gathered layers.
+        self.net = tf.keras.Sequential(layers)
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        return {ENCODER_OUT: self.net(inputs[Columns.OBS])}
+class TfMLPEncoder(Encoder, TfModel):
+    def __init__(self, config: MLPEncoderConfig) -> None:
+        TfModel.__init__(self, config)
+        Encoder.__init__(self, config)
+        # Create the neural network.
+        self.net = TfMLP(
+            input_dim=config.input_dims[0],
+            hidden_layer_dims=config.hidden_layer_dims,
+            hidden_layer_activation=config.hidden_layer_activation,
+            hidden_layer_use_layernorm=config.hidden_layer_use_layernorm,
+            hidden_layer_use_bias=config.hidden_layer_use_bias,
+            hidden_layer_weights_initializer=config.hidden_layer_weights_initializer,
+            hidden_layer_weights_initializer_config=(
+                config.hidden_layer_weights_initializer_config
+            ),
+            hidden_layer_bias_initializer=config.hidden_layer_bias_initializer,
+            hidden_layer_bias_initializer_config=(
+                config.hidden_layer_bias_initializer_config
+            ),
+            output_dim=config.output_layer_dim,
+            output_activation=config.output_layer_activation,
+            output_use_bias=config.output_layer_use_bias,
+            output_weights_initializer=config.output_layer_weights_initializer,
+            output_weights_initializer_config=(
+                config.output_layer_weights_initializer_config
+            ),
+            output_bias_initializer=config.output_layer_bias_initializer,
+            output_bias_initializer_config=config.output_layer_bias_initializer_config,
+        )
+    @override(Model)
+    def _forward(self, inputs: Dict, **kwargs) -> Dict:
+        return {ENCODER_OUT: self.net(inputs[Columns.OBS])}
+class TfGRUEncoder(TfModel, Encoder):
+    """A recurrent GRU encoder.
+    This encoder has...
+    - Zero or one tokenizers.
+    - One or more GRU layers.
+    """
+    def __init__(self, config: RecurrentEncoderConfig) -> None:
+        TfModel.__init__(self, config)
+        # Maybe create a tokenizer
+        if config.tokenizer_config is not None:
+            self.tokenizer = config.tokenizer_config.build(framework="tf2")
+            # For our first input dim, we infer from the tokenizer.
+            # This is necessary because we need to build the layers in order to be
+            # able to get/set weights directly after instantiation.
+            input_dims = (1,) + tuple(
+                self.tokenizer.output_specs[ENCODER_OUT].full_shape
+            )
+        else:
+            self.tokenizer = None
+            input_dims = (
+                1,
+                1,
+            ) + tuple(config.input_dims)
+        gru_weights_initializer = get_initializer_fn(
+            config.hidden_weights_initializer, framework="tf2"
+        )
+        gru_bias_initializer = get_initializer_fn(
+            config.hidden_bias_initializer, framework="tf2"
+        )
+        # Create the tf GRU layers.
+        self.grus = []
+        for _ in range(config.num_layers):
+            layer = tf.keras.layers.GRU(
+                config.hidden_dim,
+                time_major=not config.batch_major,
+                # Note, if the initializer is `None`, we want TensorFlow
+                # to use its default one. So we pass in `None`.
+                kernel_initializer=(
+                    gru_weights_initializer(**config.hidden_weights_initializer_config)
+                    if config.hidden_weights_initializer_config
+                    else gru_weights_initializer
+                ),
+                use_bias=config.use_bias,
+                bias_initializer=(
+                    gru_bias_initializer(**config.hidden_bias_initializer_config)
+                    if config.hidden_bias_initializer_config
+                    else gru_bias_initializer
+                ),
+                return_sequences=True,
+                return_state=True,
+            )
+            layer.build(input_dims)
+            input_dims = (1, 1, config.hidden_dim)
+            self.grus.append(layer)
+    @override(Model)
+    def get_initial_state(self):
+        return {
+            "h": tf.zeros((self.config.num_layers, self.config.hidden_dim)),
+        }
+    @override(Model)
+    def _forward(self, inputs: Dict, **kwargs) -> Dict:
+        outputs = {}
+        if self.tokenizer is not None:
+            # Push observations through the tokenizer encoder if we built one.
+            out = tokenize(self.tokenizer, inputs, framework="tf2")
+        else:
+            # Otherwise, just use the raw observations.
+            out = tf.cast(inputs[Columns.OBS], tf.float32)
+        # States are batch-first when coming in. Make them layers-first.
+        states_in = tree.map_structure(
+            lambda s: tf.transpose(s, perm=[1, 0] + list(range(2, len(s.shape)))),
+            inputs[Columns.STATE_IN],
+        )
+        states_out = []
+        for i, layer in enumerate(self.grus):
+            out, h = layer(out, states_in["h"][i])
+            states_out.append(h)
+        # Insert them into the output dict.
+        outputs[ENCODER_OUT] = out
+        outputs[Columns.STATE_OUT] = {"h": tf.stack(states_out, 1)}
+        return outputs
+class TfLSTMEncoder(TfModel, Encoder):
+    """A recurrent LSTM encoder.
+    This encoder has...
+    - Zero or one tokenizers.
+    - One or more LSTM layers.
+    """
+    def __init__(self, config: RecurrentEncoderConfig) -> None:
+        TfModel.__init__(self, config)
+        # Maybe create a tokenizer
+        if config.tokenizer_config is not None:
+            self.tokenizer = config.tokenizer_config.build(framework="tf2")
+            # For our first input dim, we infer from the tokenizer.
+            # This is necessary because we need to build the layers in order to be
+            # able to get/set weights directly after instantiation.
+            input_dims = (1,) + tuple(
+                self.tokenizer.output_specs[ENCODER_OUT].full_shape
+            )
+        else:
+            self.tokenizer = None
+            input_dims = (
+                1,
+                1,
+            ) + tuple(config.input_dims)
+        lstm_weights_initializer = get_initializer_fn(
+            config.hidden_weights_initializer, framework="tf2"
+        )
+        lstm_bias_initializer = get_initializer_fn(
+            config.hidden_bias_initializer, framework="tf2"
+        )
+        # Create the tf LSTM layers.
+        self.lstms = []
+        for _ in range(config.num_layers):
+            layer = tf.keras.layers.LSTM(
+                config.hidden_dim,
+                time_major=not config.batch_major,
+                # Note, if the initializer is `None`, we want TensorFlow
+                # to use its default one. So we pass in `None`.
+                kernel_initializer=(
+                    lstm_weights_initializer(**config.hidden_weights_initializer_config)
+                    if config.hidden_weights_initializer_config
+                    else lstm_weights_initializer
+                ),
+                use_bias=config.use_bias,
+                bias_initializer=(
+                    lstm_bias_initializer(**config.hidden_bias_initializer_config)
+                    if config.hidden_bias_initializer_config
+                    else "zeros"
+                ),
+                return_sequences=True,
+                return_state=True,
+            )
+            layer.build(input_dims)
+            input_dims = (1, 1, config.hidden_dim)
+            self.lstms.append(layer)
+    @override(Model)
+    def get_initial_state(self):
+        return {
+            "h": tf.zeros((self.config.num_layers, self.config.hidden_dim)),
+            "c": tf.zeros((self.config.num_layers, self.config.hidden_dim)),
+        }
+    @override(Model)
+    def _forward(self, inputs: Dict, **kwargs) -> Dict:
+        outputs = {}
+        if self.tokenizer is not None:
+            # Push observations through the tokenizer encoder if we built one.
+            out = tokenize(self.tokenizer, inputs, framework="tf2")
+        else:
+            # Otherwise, just use the raw observations.
+            out = tf.cast(inputs[Columns.OBS], tf.float32)
+        # States are batch-first when coming in. Make them layers-first.
+        states_in = tree.map_structure(
+            lambda s: tf.transpose(s, perm=[1, 0, 2]),
+            inputs[Columns.STATE_IN],
+        )
+        states_out_h = []
+        states_out_c = []
+        for i, layer in enumerate(self.lstms):
+            out, h, c = layer(out, (states_in["h"][i], states_in["c"][i]))
+            states_out_h.append(h)
+            states_out_c.append(c)
+        # Insert them into the output dict.
+        outputs[ENCODER_OUT] = out
+        outputs[Columns.STATE_OUT] = {
+            "h": tf.stack(states_out_h, 1),
+            "c": tf.stack(states_out_c, 1),
+        }
+        return outputs

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/heads.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import numpy as np
+from ray.rllib.core.models.base import Model
+from ray.rllib.core.models.configs import (
+    CNNTransposeHeadConfig,
+    FreeLogStdMLPHeadConfig,
+    MLPHeadConfig,
+)
+from ray.rllib.core.models.tf.base import TfModel
+from ray.rllib.core.models.tf.primitives import TfCNNTranspose, TfMLP
+from ray.rllib.models.utils import get_initializer_fn
+from ray.rllib.utils import try_import_tf
+from ray.rllib.utils.annotations import override
+tf1, tf, tfv = try_import_tf()
+class TfMLPHead(TfModel):
+    def __init__(self, config: MLPHeadConfig) -> None:
+        TfModel.__init__(self, config)
+        self.net = TfMLP(
+            input_dim=config.input_dims[0],
+            hidden_layer_dims=config.hidden_layer_dims,
+            hidden_layer_activation=config.hidden_layer_activation,
+            hidden_layer_use_layernorm=config.hidden_layer_use_layernorm,
+            hidden_layer_use_bias=config.hidden_layer_use_bias,
+            hidden_layer_weights_initializer=config.hidden_layer_weights_initializer,
+            hidden_layer_weights_initializer_config=(
+                config.hidden_layer_weights_initializer_config
+            ),
+            hidden_layer_bias_initializer=config.hidden_layer_bias_initializer,
+            hidden_layer_bias_initializer_config=(
+                config.hidden_layer_bias_initializer_config
+            ),
+            output_dim=config.output_layer_dim,
+            output_activation=config.output_layer_activation,
+            output_use_bias=config.output_layer_use_bias,
+            output_weights_initializer=config.output_layer_weights_initializer,
+            output_weights_initializer_config=(
+                config.output_layer_weights_initializer_config
+            ),
+            output_bias_initializer=config.output_layer_bias_initializer,
+            output_bias_initializer_config=config.output_layer_bias_initializer_config,
+        )
+        # If log standard deviations should be clipped. This should be only true for
+        # policy heads. Value heads should never be clipped.
+        self.clip_log_std = config.clip_log_std
+        # The clipping parameter for the log standard deviation.
+        self.log_std_clip_param = tf.constant([config.log_std_clip_param])
+    @override(Model)
+    def _forward(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
+        # Only clip the log standard deviations, if the user wants to clip. This
+        # avoids also clipping value heads.
+        if self.clip_log_std:
+            # Forward pass.
+            means, log_stds = tf.split(self.net(inputs), num_or_size_splits=2, axis=-1)
+            # Clip the log standard deviations.
+            log_stds = tf.clip_by_value(
+                log_stds, -self.log_std_clip_param, self.log_std_clip_param
+            )
+            return tf.concat([means, log_stds], axis=-1)
+        # Otherwise just return the logits.
+        else:
+            return self.net(inputs)
+class TfFreeLogStdMLPHead(TfModel):
+    """An MLPHead that implements floating log stds for Gaussian distributions."""
+    def __init__(self, config: FreeLogStdMLPHeadConfig) -> None:
+        TfModel.__init__(self, config)
+        assert config.output_dims[0] % 2 == 0, "output_dims must be even for free std!"
+        self._half_output_dim = config.output_dims[0] // 2
+        self.net = TfMLP(
+            input_dim=config.input_dims[0],
+            hidden_layer_dims=config.hidden_layer_dims,
+            hidden_layer_activation=config.hidden_layer_activation,
+            hidden_layer_use_layernorm=config.hidden_layer_use_layernorm,
+            hidden_layer_use_bias=config.hidden_layer_use_bias,
+            hidden_layer_weights_initializer=config.hidden_layer_weights_initializer,
+            hidden_layer_weights_initializer_config=(
+                config.hidden_layer_weights_initializer_config
+            ),
+            hidden_layer_bias_initializer=config.hidden_layer_bias_initializer,
+            hidden_layer_bias_initializer_config=(
+                config.hidden_layer_bias_initializer_config
+            ),
+            output_dim=self._half_output_dim,
+            output_activation=config.output_layer_activation,
+            output_use_bias=config.output_layer_use_bias,
+            output_weights_initializer=config.output_layer_weights_initializer,
+            output_weights_initializer_config=(
+                config.output_layer_weights_initializer_config
+            ),
+            output_bias_initializer=config.output_layer_bias_initializer,
+            output_bias_initializer_config=config.output_layer_bias_initializer_config,
+        )
+        self.log_std = tf.Variable(
+            tf.zeros(self._half_output_dim),
+            name="log_std",
+            dtype=tf.float32,
+            trainable=True,
+        )
+        # If log standard deviations should be clipped. This should be only true for
+        # policy heads. Value heads should never be clipped.
+        self.clip_log_std = config.clip_log_std
+        # The clipping parameter for the log standard deviation.
+        self.log_std_clip_param = tf.constant([config.log_std_clip_param])
+    @override(Model)
+    def _forward(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
+        # Compute the mean first, then append the log_std.
+        mean = self.net(inputs)
+        # If log standard deviation should be clipped.
+        if self.clip_log_std:
+            # Clip log standard deviations to stabilize training. Note, the
+            # default clip value is `inf`, i.e. no clipping.
+            log_std = tf.clip_by_value(
+                self.log_std, -self.log_std_clip_param, self.log_std_clip_param
+            )
+        else:
+            log_std = self.log_std
+        log_std_out = tf.tile(tf.expand_dims(log_std, 0), [tf.shape(inputs)[0], 1])
+        logits_out = tf.concat([mean, log_std_out], axis=1)
+        return logits_out
+class TfCNNTransposeHead(TfModel):
+    def __init__(self, config: CNNTransposeHeadConfig) -> None:
+        super().__init__(config)
+        # Initial, inactivated Dense layer (always w/ bias). Use the
+        # hidden layer initializer for this layer.
+        initial_dense_weights_initializer = get_initializer_fn(
+            config.initial_dense_weights_initializer, framework="tf2"
+        )
+        initial_dense_bias_initializer = get_initializer_fn(
+            config.initial_dense_bias_initializer, framework="tf2"
+        )
+        # This layer is responsible for getting the incoming tensor into a proper
+        # initial image shape (w x h x filters) for the suceeding Conv2DTranspose stack.
+        self.initial_dense = tf.keras.layers.Dense(
+            units=int(np.prod(config.initial_image_dims)),
+            activation=None,
+            kernel_initializer=(
+                initial_dense_weights_initializer(
+                    **config.initial_dense_weights_initializer_config
+                )
+                if config.initial_dense_weights_initializer_config
+                else initial_dense_weights_initializer
+            ),
+            use_bias=True,
+            bias_initializer=(
+                initial_dense_bias_initializer(
+                    **config.initial_dense_bias_initializer_config
+                )
+                if config.initial_dense_bias_initializer_config
+                else initial_dense_bias_initializer
+            ),
+        )
+        # The main CNNTranspose stack.
+        self.cnn_transpose_net = TfCNNTranspose(
+            input_dims=config.initial_image_dims,
+            cnn_transpose_filter_specifiers=config.cnn_transpose_filter_specifiers,
+            cnn_transpose_activation=config.cnn_transpose_activation,
+            cnn_transpose_use_layernorm=config.cnn_transpose_use_layernorm,
+            cnn_transpose_use_bias=config.cnn_transpose_use_bias,
+            cnn_transpose_kernel_initializer=config.cnn_transpose_kernel_initializer,
+            cnn_transpose_kernel_initializer_config=(
+                config.cnn_transpose_kernel_initializer_config
+            ),
+            cnn_transpose_bias_initializer=config.cnn_transpose_bias_initializer,
+            cnn_transpose_bias_initializer_config=(
+                config.cnn_transpose_bias_initializer_config
+            ),
+        )
+    @override(Model)
+    def _forward(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
+        # Push through initial dense layer to get dimensions of first "image".
+        out = self.initial_dense(inputs)
+        # Reshape to initial 3D (image-like) format to enter CNN transpose stack.
+        out = tf.reshape(
+            out,
+            shape=(-1,) + tuple(self.config.initial_image_dims),
+        )
+        # Push through CNN transpose stack.
+        out = self.cnn_transpose_net(out)
+        # Add 0.5 to center the (always non-activated, non-normalized) outputs more
+        # around 0.0.
+        return out + 0.5

.venv/lib/python3.11/site-packages/ray/rllib/core/models/tf/primitives.py ADDED Viewed

	@@ -0,0 +1,429 @@

+from typing import Callable, Dict, List, Optional, Tuple, Union
+from ray.rllib.models.utils import get_activation_fn, get_initializer_fn
+from ray.rllib.utils.framework import try_import_tf
+_, tf, _ = try_import_tf()
+class TfMLP(tf.keras.Model):
+    """A multi-layer perceptron with N dense layers.
+    All layers (except for an optional additional extra output layer) share the same
+    activation function, bias setup (use bias or not), and LayerNorm setup
+    (use layer normalization or not).
+    If `output_dim` (int) is not None, an additional, extra output dense layer is added,
+    which might have its own activation function (e.g. "linear"). However, the output
+    layer does NOT use layer normalization.
+    """
+    def __init__(
+        self,
+        *,
+        input_dim: int,
+        hidden_layer_dims: List[int],
+        hidden_layer_use_layernorm: bool = False,
+        hidden_layer_use_bias: bool = True,
+        hidden_layer_activation: Optional[Union[str, Callable]] = "relu",
+        hidden_layer_weights_initializer: Optional[Union[str, Callable]] = None,
+        hidden_layer_weights_initializer_config: Optional[Dict] = None,
+        hidden_layer_bias_initializer: Optional[Union[str, Callable]] = None,
+        hidden_layer_bias_initializer_config: Optional[Dict] = None,
+        output_dim: Optional[int] = None,
+        output_use_bias: bool = True,
+        output_activation: Optional[Union[str, Callable]] = "linear",
+        output_weights_initializer: Optional[Union[str, Callable]] = None,
+        output_weights_initializer_config: Optional[Dict] = None,
+        output_bias_initializer: Optional[Union[str, Callable]] = None,
+        output_bias_initializer_config: Optional[Dict] = None,
+    ):
+        """Initialize a TfMLP object.
+        Args:
+            input_dim: The input dimension of the network. Must not be None.
+            hidden_layer_dims: The sizes of the hidden layers. If an empty list, only a
+                single layer will be built of size `output_dim`.
+            hidden_layer_use_layernorm: Whether to insert a LayerNormalization
+                functionality in between each hidden layer's output and its activation.
+            hidden_layer_use_bias: Whether to use bias on all dense layers (excluding
+                the possible separate output layer).
+            hidden_layer_activation: The activation function to use after each layer
+                (except for the output). Either a tf.nn.[activation fn] callable or a
+                string that's supported by tf.keras.layers.Activation(activation=...),
+                e.g. "relu", "ReLU", "silu", or "linear".
+            hidden_layer_weights_initializer: The initializer function or class to use
+                for weights initialization in the hidden layers. If `None` the default
+                initializer of the respective dense layer is used. Note, all
+                initializers defined in `tf.keras.initializers` are allowed.
+            hidden_layer_weights_initializer_config: Configuration to pass into the
+                initializer defined in `hidden_layer_weights_initializer`.
+            hidden_layer_bias_initializer: The initializer function or class to use for
+                bias initialization in the hidden layers. If `None` the default
+                initializer of the respective dense layer is used. Note, all
+                initializers defined in `tf.keras.initializers` are allowed.
+            hidden_layer_bias_initializer_config: Configuration to pass into the
+                initializer defined in `hidden_layer_bias_initializer`.
+            output_dim: The output dimension of the network. If None, no specific output
+                layer will be added and the last layer in the stack will have
+                size=`hidden_layer_dims[-1]`.
+            output_use_bias: Whether to use bias on the separate output layer,
+                if any.
+            output_activation: The activation function to use for the output layer
+                (if any). Either a tf.nn.[activation fn] callable or a string that's
+                supported by tf.keras.layers.Activation(activation=...), e.g. "relu",
+                "ReLU", "silu", or "linear".
+            output_layer_weights_initializer: The initializer function or class to use
+                for weights initialization in the output layers. If `None` the default
+                initializer of the respective dense layer is used. Note, all
+                initializers defined in `tf.keras.initializers` are allowed.
+            output_layer_weights_initializer_config: Configuration to pass into the
+                initializer defined in `output_layer_weights_initializer`.
+            output_layer_bias_initializer: The initializer function or class to use for
+                bias initialization in the output layers. If `None` the default
+                initializer of the respective dense layer is used. Note, all
+                initializers defined in `tf.keras.initializers` are allowed.
+            output_layer_bias_initializer_config: Configuration to pass into the
+                initializer defined in `output_layer_bias_initializer`.
+        """
+        super().__init__()
+        assert input_dim > 0
+        layers = []
+        # Input layer.
+        layers.append(tf.keras.Input(shape=(input_dim,)))
+        hidden_activation = get_activation_fn(hidden_layer_activation, framework="tf2")
+        hidden_weights_initializer = get_initializer_fn(
+            hidden_layer_weights_initializer, framework="tf2"
+        )
+        hidden_bias_initializer = get_initializer_fn(
+            hidden_layer_bias_initializer, framework="tf2"
+        )
+        for i in range(len(hidden_layer_dims)):
+            # Dense layer with activation (or w/o in case we use LayerNorm, in which
+            # case the activation is applied after the layer normalization step).
+            layers.append(
+                tf.keras.layers.Dense(
+                    hidden_layer_dims[i],
+                    activation=(
+                        hidden_activation if not hidden_layer_use_layernorm else None
+                    ),
+                    # Note, if the initializer is `None`, we want TensorFlow
+                    # to use its default one. So we pass in `None`.
+                    kernel_initializer=(
+                        hidden_weights_initializer(
+                            **hidden_layer_weights_initializer_config
+                        )
+                        if hidden_layer_weights_initializer_config
+                        else hidden_weights_initializer
+                    ),
+                    use_bias=hidden_layer_use_bias,
+                    bias_initializer=(
+                        hidden_bias_initializer(**hidden_layer_bias_initializer_config)
+                        if hidden_layer_bias_initializer_config
+                        else hidden_bias_initializer
+                    ),
+                )
+            )
+            # Add LayerNorm and activation.
+            if hidden_layer_use_layernorm:
+                # Use epsilon=1e-5 here (instead of default 1e-3) to be unified
+                # with torch.
+                layers.append(tf.keras.layers.LayerNormalization(epsilon=1e-5))
+                layers.append(tf.keras.layers.Activation(hidden_activation))
+        output_weights_initializer = get_initializer_fn(
+            output_weights_initializer, framework="tf2"
+        )
+        output_bias_initializer = get_initializer_fn(
+            output_bias_initializer, framework="tf2"
+        )
+        if output_dim is not None:
+            output_activation = get_activation_fn(output_activation, framework="tf2")
+            layers.append(
+                tf.keras.layers.Dense(
+                    output_dim,
+                    activation=output_activation,
+                    # Note, if the initializer is `None`, we want TensorFlow
+                    # to use its default one. So we pass in `None`.
+                    kernel_initializer=(
+                        output_weights_initializer(**output_weights_initializer_config)
+                        if output_weights_initializer_config
+                        else output_weights_initializer
+                    ),
+                    use_bias=output_use_bias,
+                    bias_initializer=(
+                        output_bias_initializer(**output_bias_initializer_config)
+                        if output_bias_initializer_config
+                        else output_bias_initializer
+                    ),
+                )
+            )
+        self.network = tf.keras.Sequential(layers)
+    def call(self, inputs, **kwargs):
+        return self.network(inputs)
+class TfCNN(tf.keras.Model):
+    """A model containing a CNN with N Conv2D layers.
+    All layers share the same activation function, bias setup (use bias or not), and
+    LayerNormalization setup (use layer normalization or not).
+    Note that there is no flattening nor an additional dense layer at the end of the
+    stack. The output of the network is a 3D tensor of dimensions [width x height x num
+    output filters].
+    """
+    def __init__(
+        self,
+        *,
+        input_dims: Union[List[int], Tuple[int]],
+        cnn_filter_specifiers: List[List[Union[int, List]]],
+        cnn_use_bias: bool = True,
+        cnn_use_layernorm: bool = False,
+        cnn_activation: Optional[str] = "relu",
+        cnn_kernel_initializer: Optional[Union[str, Callable]] = None,
+        cnn_kernel_initializer_config: Optional[Dict] = None,
+        cnn_bias_initializer: Optional[Union[str, Callable]] = None,
+        cnn_bias_initializer_config: Optional[Dict] = None,
+    ):
+        """Initializes a TfCNN instance.
+        Args:
+            input_dims: The 3D input dimensions of the network (incoming image).
+            cnn_filter_specifiers: A list in which each element is another (inner) list
+                of either the following forms:
+                `[number of channels/filters, kernel, stride]`
+                OR:
+                `[number of channels/filters, kernel, stride, padding]`, where `padding`
+                can either be "same" or "valid".
+                When using the first format w/o the `padding` specifier, `padding` is
+                "same" by default. Also, `kernel` and `stride` may be provided either as
+                single ints (square) or as a tuple/list of two ints (width- and height
+                dimensions) for non-squared kernel/stride shapes.
+                A good rule of thumb for constructing CNN stacks is:
+                When using padding="same", the input "image" will be reduced in size by
+                the factor `stride`, e.g. input=(84, 84, 3) stride=2 kernel=x
+                padding="same" filters=16 -> output=(42, 42, 16).
+                For example, if you would like to reduce an Atari image from its
+                original (84, 84, 3) dimensions down to (6, 6, F), you can construct the
+                following stack and reduce the w x h dimension of the image by 2 in each
+                layer:
+                [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]] -> output=(6, 6, 128)
+            cnn_use_bias: Whether to use bias on all Conv2D layers.
+            cnn_activation: The activation function to use after each Conv2D layer.
+            cnn_use_layernorm: Whether to insert a LayerNormalization functionality
+                in between each Conv2D layer's outputs and its activation.
+            cnn_kernel_initializer: The initializer function or class to use for kernel
+                initialization in the CNN layers. If `None` the default initializer of
+                the respective CNN layer is used. Note, all initializers defined in
+                `tf.keras.initializers` are allowed.
+            cnn_kernel_initializer_config: Configuration to pass into the initializer
+                defined in `cnn_kernel_initializer`.
+            cnn_bias_initializer: The initializer function or class to use for bias
+                initialization in the CNN layers. If `None` the default initializer of
+                the respective CNN layer is used. Note, all initializers defined in
+                `tf.keras.initializers` are allowed.
+            cnn_bias_initializer_config: Configuration to pass into the initializer
+                defined in `cnn_bias_initializer`.
+        """
+        super().__init__()
+        assert len(input_dims) == 3
+        cnn_activation = get_activation_fn(cnn_activation, framework="tf2")
+        cnn_kernel_initializer = get_initializer_fn(
+            cnn_kernel_initializer, framework="tf2"
+        )
+        cnn_bias_initializer = get_initializer_fn(cnn_bias_initializer, framework="tf2")
+        layers = []
+        # Input layer.
+        layers.append(tf.keras.layers.Input(shape=input_dims))
+        for filter_specs in cnn_filter_specifiers:
+            # Padding information not provided -> Use "same" as default.
+            if len(filter_specs) == 3:
+                num_filters, kernel_size, strides = filter_specs
+                padding = "same"
+            # Padding information provided.
+            else:
+                num_filters, kernel_size, strides, padding = filter_specs
+            layers.append(
+                tf.keras.layers.Conv2D(
+                    filters=num_filters,
+                    kernel_size=kernel_size,
+                    strides=strides,
+                    padding=padding,
+                    use_bias=cnn_use_bias,
+                    activation=None if cnn_use_layernorm else cnn_activation,
+                    # Note, if the initializer is `None`, we want TensorFlow
+                    # to use its default one. So we pass in `None`.
+                    kernel_initializer=(
+                        cnn_kernel_initializer(**cnn_kernel_initializer_config)
+                        if cnn_kernel_initializer_config
+                        else cnn_kernel_initializer
+                    ),
+                    bias_initializer=(
+                        cnn_bias_initializer(**cnn_bias_initializer_config)
+                        if cnn_bias_initializer_config
+                        else cnn_bias_initializer
+                    ),
+                )
+            )
+            if cnn_use_layernorm:
+                # Use epsilon=1e-5 here (instead of default 1e-3) to be unified with
+                # torch. Need to normalize over all axes.
+                layers.append(
+                    tf.keras.layers.LayerNormalization(axis=[-3, -2, -1], epsilon=1e-5)
+                )
+                layers.append(tf.keras.layers.Activation(cnn_activation))
+        # Create the final CNN network.
+        self.cnn = tf.keras.Sequential(layers)
+        self.expected_input_dtype = tf.float32
+    def call(self, inputs, **kwargs):
+        return self.cnn(tf.cast(inputs, self.expected_input_dtype))
+class TfCNNTranspose(tf.keras.Model):
+    """A model containing a CNNTranspose with N Conv2DTranspose layers.
+    All layers share the same activation function, bias setup (use bias or not), and
+    LayerNormalization setup (use layer normalization or not), except for the last one,
+    which is never activated and never layer norm'd.
+    Note that there is no reshaping/flattening nor an additional dense layer at the
+    beginning or end of the stack. The input as well as output of the network are 3D
+    tensors of dimensions [width x height x num output filters].
+    """
+    def __init__(
+        self,
+        *,
+        input_dims: Union[List[int], Tuple[int]],
+        cnn_transpose_filter_specifiers: List[List[Union[int, List]]],
+        cnn_transpose_use_bias: bool = True,
+        cnn_transpose_activation: Optional[str] = "relu",
+        cnn_transpose_use_layernorm: bool = False,
+        cnn_transpose_kernel_initializer: Optional[Union[str, Callable]] = None,
+        cnn_transpose_kernel_initializer_config: Optional[Dict] = None,
+        cnn_transpose_bias_initializer: Optional[Union[str, Callable]] = None,
+        cnn_transpose_bias_initializer_config: Optional[Dict] = None,
+    ):
+        """Initializes a TfCNNTranspose instance.
+        Args:
+            input_dims: The 3D input dimensions of the network (incoming image).
+            cnn_transpose_filter_specifiers: A list of lists, where each item represents
+                one Conv2DTranspose layer. Each such Conv2DTranspose layer is further
+                specified by the elements of the inner lists. The inner lists follow
+                the format: `[number of filters, kernel, stride]` to
+                specify a convolutional-transpose layer stacked in order of the
+                outer list.
+                `kernel` as well as `stride` might be provided as width x height tuples
+                OR as single ints representing both dimension (width and height)
+                in case of square shapes.
+            cnn_transpose_use_bias: Whether to use bias on all Conv2DTranspose layers.
+            cnn_transpose_use_layernorm: Whether to insert a LayerNormalization
+                functionality in between each Conv2DTranspose layer's outputs and its
+                activation.
+                The last Conv2DTranspose layer will not be normed, regardless.
+            cnn_transpose_activation: The activation function to use after each layer
+                (except for the last Conv2DTranspose layer, which is always
+                non-activated).
+            cnn_transpose_kernel_initializer: The initializer function or class to use
+                for kernel initialization in the CNN layers. If `None` the default
+                initializer of the respective CNN layer is used. Note, all initializers
+                defined in `tf.keras.initializers` are allowed.
+            cnn_transpose_kernel_initializer_config: Configuration to pass into the
+                initializer defined in `cnn_transpose_kernel_initializer`.
+            cnn_transpose_bias_initializer: The initializer function or class to use for
+                bias initialization in the CNN layers. If `None` the default initializer
+                of the respective CNN layer is used. Note, only the in-place
+                initializers, i.e. ending with an underscore "_" are allowed.
+            cnn_transpose_bias_initializer_config: Configuration to pass into the
+                initializer defined in `cnn_transpose_bias_initializer`.
+        """
+        super().__init__()
+        assert len(input_dims) == 3
+        cnn_transpose_activation = get_activation_fn(
+            cnn_transpose_activation, framework="tf2"
+        )
+        cnn_transpose_kernel_initializer = get_initializer_fn(
+            cnn_transpose_kernel_initializer,
+            framework="tf2",
+        )
+        cnn_transpose_bias_initializer = get_initializer_fn(
+            cnn_transpose_bias_initializer, framework="tf2"
+        )
+        layers = []
+        # Input layer.
+        layers.append(tf.keras.layers.Input(shape=input_dims))
+        for i, (num_filters, kernel_size, strides) in enumerate(
+            cnn_transpose_filter_specifiers
+        ):
+            is_final_layer = i == len(cnn_transpose_filter_specifiers) - 1
+            layers.append(
+                tf.keras.layers.Conv2DTranspose(
+                    filters=num_filters,
+                    kernel_size=kernel_size,
+                    strides=strides,
+                    padding="same",
+                    # Last layer is never activated (regardless of config).
+                    activation=(
+                        None
+                        if cnn_transpose_use_layernorm or is_final_layer
+                        else cnn_transpose_activation
+                    ),
+                    # Note, if the initializer is `None`, we want TensorFlow
+                    # to use its default one. So we pass in `None`.
+                    kernel_initializer=(
+                        cnn_transpose_kernel_initializer(
+                            **cnn_transpose_kernel_initializer_config
+                        )
+                        if cnn_transpose_kernel_initializer_config
+                        else cnn_transpose_kernel_initializer
+                    ),
+                    # Last layer always uses bias (b/c has no LayerNorm, regardless of
+                    # config).
+                    use_bias=cnn_transpose_use_bias or is_final_layer,
+                    bias_initializer=(
+                        cnn_transpose_bias_initializer(
+                            **cnn_transpose_bias_initializer_config
+                        )
+                        if cnn_transpose_bias_initializer_config
+                        else cnn_transpose_bias_initializer
+                    ),
+                )
+            )
+            if cnn_transpose_use_layernorm and not is_final_layer:
+                # Use epsilon=1e-5 here (instead of default 1e-3) to be unified with
+                # torch. Need to normalize over all axes.
+                layers.append(
+                    tf.keras.layers.LayerNormalization(axis=[-3, -2, -1], epsilon=1e-5)
+                )
+                layers.append(tf.keras.layers.Activation(cnn_transpose_activation))
+        # Create the final CNNTranspose network.
+        self.cnn_transpose = tf.keras.Sequential(layers)
+        self.expected_input_dtype = tf.float32
+    def call(self, inputs, **kwargs):
+        return self.cnn_transpose(tf.cast(inputs, self.expected_input_dtype))

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (6.23 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/encoder.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/__pycache__/primitives.cpython-311.pyc ADDED Viewed

Binary file (23.2 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/base.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import abc
+import logging
+from typing import Tuple, Union
+import numpy as np
+from ray.rllib.core.models.base import Model
+from ray.rllib.core.models.configs import ModelConfig
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.typing import TensorType
+torch, nn = try_import_torch()
+logger = logging.getLogger(__name__)
+class TorchModel(nn.Module, Model, abc.ABC):
+    """Base class for RLlib's PyTorch models.
+    This class defines the interface for RLlib's PyTorch models and checks
+    whether inputs and outputs of forward are checked with `check_input_specs()` and
+    `check_output_specs()` respectively.
+    Example usage for a single Flattening layer:
+    .. testcode::
+        from ray.rllib.core.models.configs import ModelConfig
+        from ray.rllib.core.models.torch.base import TorchModel
+        import torch
+        class FlattenModelConfig(ModelConfig):
+            def build(self, framework: str):
+                assert framework == "torch"
+                return TorchFlattenModel(self)
+        class TorchFlattenModel(TorchModel):
+            def __init__(self, config):
+                TorchModel.__init__(self, config)
+                self.flatten_layer = torch.nn.Flatten()
+            def _forward(self, inputs, **kwargs):
+                return self.flatten_layer(inputs)
+        model = FlattenModelConfig().build("torch")
+        inputs = torch.Tensor([[[1, 2]]])
+        print(model(inputs))
+    .. testoutput::
+        tensor([[1., 2.]])
+    """
+    def __init__(self, config: ModelConfig):
+        """Initialized a TorchModel.
+        Args:
+            config: The ModelConfig to use.
+        """
+        nn.Module.__init__(self)
+        Model.__init__(self, config)
+    def forward(
+        self, inputs: Union[dict, TensorType], **kwargs
+    ) -> Union[dict, TensorType]:
+        """Returns the output of this model for the given input.
+        This method only makes sure that we have a spec-checked _forward() method.
+        Args:
+            inputs: The input tensors.
+            **kwargs: Forward compatibility kwargs.
+        Returns:
+            dict: The output tensors.
+        """
+        return self._forward(inputs, **kwargs)
+    @override(Model)
+    def get_num_parameters(self) -> Tuple[int, int]:
+        num_all_params = sum(int(np.prod(p.size())) for p in self.parameters())
+        trainable_params = filter(lambda p: p.requires_grad, self.parameters())
+        num_trainable_params = sum(int(np.prod(p.size())) for p in trainable_params)
+        return (
+            num_trainable_params,
+            num_all_params - num_trainable_params,
+        )
+    @override(Model)
+    def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)):
+        trainable_weights = [p for p in self.parameters() if p.requires_grad]
+        non_trainable_weights = [p for p in self.parameters() if not p.requires_grad]
+        for i, w in enumerate(trainable_weights + non_trainable_weights):
+            fill_val = value_sequence[i % len(value_sequence)]
+            with torch.no_grad():
+                w.fill_(fill_val)

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/encoder.py ADDED Viewed

	@@ -0,0 +1,284 @@

+import tree
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.models.base import (
+    Encoder,
+    ActorCriticEncoder,
+    StatefulActorCriticEncoder,
+    ENCODER_OUT,
+)
+from ray.rllib.core.models.base import Model, tokenize
+from ray.rllib.core.models.configs import (
+    ActorCriticEncoderConfig,
+    CNNEncoderConfig,
+    MLPEncoderConfig,
+    RecurrentEncoderConfig,
+)
+from ray.rllib.core.models.torch.base import TorchModel
+from ray.rllib.core.models.torch.primitives import TorchMLP, TorchCNN
+from ray.rllib.models.utils import get_initializer_fn
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.framework import try_import_torch
+torch, nn = try_import_torch()
+class TorchActorCriticEncoder(TorchModel, ActorCriticEncoder):
+    """An actor-critic encoder for torch."""
+    framework = "torch"
+    def __init__(self, config: ActorCriticEncoderConfig) -> None:
+        TorchModel.__init__(self, config)
+        ActorCriticEncoder.__init__(self, config)
+class TorchStatefulActorCriticEncoder(TorchModel, StatefulActorCriticEncoder):
+    """A stateful actor-critic encoder for torch."""
+    framework = "torch"
+    def __init__(self, config: ActorCriticEncoderConfig) -> None:
+        TorchModel.__init__(self, config)
+        StatefulActorCriticEncoder.__init__(self, config)
+class TorchMLPEncoder(TorchModel, Encoder):
+    def __init__(self, config: MLPEncoderConfig) -> None:
+        TorchModel.__init__(self, config)
+        Encoder.__init__(self, config)
+        # Create the neural network.
+        self.net = TorchMLP(
+            input_dim=config.input_dims[0],
+            hidden_layer_dims=config.hidden_layer_dims,
+            hidden_layer_activation=config.hidden_layer_activation,
+            hidden_layer_use_layernorm=config.hidden_layer_use_layernorm,
+            hidden_layer_use_bias=config.hidden_layer_use_bias,
+            hidden_layer_weights_initializer=config.hidden_layer_weights_initializer,
+            hidden_layer_weights_initializer_config=(
+                config.hidden_layer_weights_initializer_config
+            ),
+            hidden_layer_bias_initializer=config.hidden_layer_bias_initializer,
+            hidden_layer_bias_initializer_config=(
+                config.hidden_layer_bias_initializer_config
+            ),
+            output_dim=config.output_layer_dim,
+            output_activation=config.output_layer_activation,
+            output_use_bias=config.output_layer_use_bias,
+            output_weights_initializer=config.output_layer_weights_initializer,
+            output_weights_initializer_config=(
+                config.output_layer_weights_initializer_config
+            ),
+            output_bias_initializer=config.output_layer_bias_initializer,
+            output_bias_initializer_config=config.output_layer_bias_initializer_config,
+        )
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        return {ENCODER_OUT: self.net(inputs[Columns.OBS])}
+class TorchCNNEncoder(TorchModel, Encoder):
+    def __init__(self, config: CNNEncoderConfig) -> None:
+        TorchModel.__init__(self, config)
+        Encoder.__init__(self, config)
+        layers = []
+        # The bare-bones CNN (no flatten, no succeeding dense).
+        cnn = TorchCNN(
+            input_dims=config.input_dims,
+            cnn_filter_specifiers=config.cnn_filter_specifiers,
+            cnn_activation=config.cnn_activation,
+            cnn_use_layernorm=config.cnn_use_layernorm,
+            cnn_use_bias=config.cnn_use_bias,
+            cnn_kernel_initializer=config.cnn_kernel_initializer,
+            cnn_kernel_initializer_config=config.cnn_kernel_initializer_config,
+            cnn_bias_initializer=config.cnn_bias_initializer,
+            cnn_bias_initializer_config=config.cnn_bias_initializer_config,
+        )
+        layers.append(cnn)
+        # Add a flatten operation to move from 2/3D into 1D space.
+        if config.flatten_at_end:
+            layers.append(nn.Flatten())
+        # Create the network from gathered layers.
+        self.net = nn.Sequential(*layers)
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        return {ENCODER_OUT: self.net(inputs[Columns.OBS])}
+class TorchGRUEncoder(TorchModel, Encoder):
+    """A recurrent GRU encoder.
+    This encoder has...
+    - Zero or one tokenizers.
+    - One or more GRU layers.
+    """
+    def __init__(self, config: RecurrentEncoderConfig) -> None:
+        TorchModel.__init__(self, config)
+        # Maybe create a tokenizer
+        if config.tokenizer_config is not None:
+            self.tokenizer = config.tokenizer_config.build(framework="torch")
+            gru_input_dims = config.tokenizer_config.output_dims
+        else:
+            self.tokenizer = None
+            gru_input_dims = config.input_dims
+        # We only support 1D spaces right now.
+        assert len(gru_input_dims) == 1
+        gru_input_dim = gru_input_dims[0]
+        gru_weights_initializer = get_initializer_fn(
+            config.hidden_weights_initializer, framework="torch"
+        )
+        gru_bias_initializer = get_initializer_fn(
+            config.hidden_bias_initializer, framework="torch"
+        )
+        # Create the torch GRU layer.
+        self.gru = nn.GRU(
+            gru_input_dim,
+            config.hidden_dim,
+            config.num_layers,
+            batch_first=config.batch_major,
+            bias=config.use_bias,
+        )
+        # Initialize, GRU weights, if necessary.
+        if gru_weights_initializer:
+            gru_weights_initializer(
+                self.gru.weight, **config.hidden_weights_initializer_config or {}
+            )
+        # Initialize GRU bias, if necessary.
+        if gru_bias_initializer:
+            gru_bias_initializer(
+                self.gru.weight, **config.hidden_bias_initializer_config or {}
+            )
+    @override(Model)
+    def get_initial_state(self):
+        return {
+            "h": torch.zeros(self.config.num_layers, self.config.hidden_dim),
+        }
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        outputs = {}
+        if self.tokenizer is not None:
+            # Push observations through the tokenizer encoder if we built one.
+            out = tokenize(self.tokenizer, inputs, framework="torch")
+        else:
+            # Otherwise, just use the raw observations.
+            out = inputs[Columns.OBS].float()
+        # States are batch-first when coming in. Make them layers-first.
+        states_in = tree.map_structure(
+            lambda s: s.transpose(0, 1), inputs[Columns.STATE_IN]
+        )
+        out, states_out = self.gru(out, states_in["h"])
+        states_out = {"h": states_out}
+        # Insert them into the output dict.
+        outputs[ENCODER_OUT] = out
+        outputs[Columns.STATE_OUT] = tree.map_structure(
+            lambda s: s.transpose(0, 1), states_out
+        )
+        return outputs
+class TorchLSTMEncoder(TorchModel, Encoder):
+    """A recurrent LSTM encoder.
+    This encoder has...
+    - Zero or one tokenizers.
+    - One or more LSTM layers.
+    """
+    def __init__(self, config: RecurrentEncoderConfig) -> None:
+        TorchModel.__init__(self, config)
+        # Maybe create a tokenizer
+        if config.tokenizer_config is not None:
+            self.tokenizer = config.tokenizer_config.build(framework="torch")
+            lstm_input_dims = config.tokenizer_config.output_dims
+        else:
+            self.tokenizer = None
+            lstm_input_dims = config.input_dims
+        # We only support 1D spaces right now.
+        assert len(lstm_input_dims) == 1
+        lstm_input_dim = lstm_input_dims[0]
+        lstm_weights_initializer = get_initializer_fn(
+            config.hidden_weights_initializer, framework="torch"
+        )
+        lstm_bias_initializer = get_initializer_fn(
+            config.hidden_bias_initializer, framework="torch"
+        )
+        # Create the torch LSTM layer.
+        self.lstm = nn.LSTM(
+            lstm_input_dim,
+            config.hidden_dim,
+            config.num_layers,
+            batch_first=config.batch_major,
+            bias=config.use_bias,
+        )
+        # Initialize LSTM layer weigths and biases, if necessary.
+        for layer in self.lstm.all_weights:
+            if lstm_weights_initializer:
+                lstm_weights_initializer(
+                    layer[0], **config.hidden_weights_initializer_config or {}
+                )
+                lstm_weights_initializer(
+                    layer[1], **config.hidden_weights_initializer_config or {}
+                )
+            if lstm_bias_initializer:
+                lstm_bias_initializer(
+                    layer[2], **config.hidden_bias_initializer_config or {}
+                )
+                lstm_bias_initializer(
+                    layer[3], **config.hidden_bias_initializer_config or {}
+                )
+    @override(Model)
+    def get_initial_state(self):
+        return {
+            "h": torch.zeros(self.config.num_layers, self.config.hidden_dim),
+            "c": torch.zeros(self.config.num_layers, self.config.hidden_dim),
+        }
+    @override(Model)
+    def _forward(self, inputs: dict, **kwargs) -> dict:
+        outputs = {}
+        if self.tokenizer is not None:
+            # Push observations through the tokenizer encoder if we built one.
+            out = tokenize(self.tokenizer, inputs, framework="torch")
+        else:
+            # Otherwise, just use the raw observations.
+            out = inputs[Columns.OBS].float()
+        # States are batch-first when coming in. Make them layers-first.
+        states_in = tree.map_structure(
+            lambda s: s.transpose(0, 1), inputs[Columns.STATE_IN]
+        )
+        out, states_out = self.lstm(out, (states_in["h"], states_in["c"]))
+        states_out = {"h": states_out[0], "c": states_out[1]}
+        # Insert them into the output dict.
+        outputs[ENCODER_OUT] = out
+        outputs[Columns.STATE_OUT] = tree.map_structure(
+            lambda s: s.transpose(0, 1), states_out
+        )
+        return outputs

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/heads.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import numpy as np
+from ray.rllib.core.models.base import Model
+from ray.rllib.core.models.configs import (
+    CNNTransposeHeadConfig,
+    FreeLogStdMLPHeadConfig,
+    MLPHeadConfig,
+)
+from ray.rllib.core.models.torch.base import TorchModel
+from ray.rllib.core.models.torch.primitives import TorchCNNTranspose, TorchMLP
+from ray.rllib.models.utils import get_initializer_fn
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.framework import try_import_torch
+torch, nn = try_import_torch()
+class TorchMLPHead(TorchModel):
+    def __init__(self, config: MLPHeadConfig) -> None:
+        super().__init__(config)
+        self.net = TorchMLP(
+            input_dim=config.input_dims[0],
+            hidden_layer_dims=config.hidden_layer_dims,
+            hidden_layer_activation=config.hidden_layer_activation,
+            hidden_layer_use_layernorm=config.hidden_layer_use_layernorm,
+            hidden_layer_use_bias=config.hidden_layer_use_bias,
+            hidden_layer_weights_initializer=config.hidden_layer_weights_initializer,
+            hidden_layer_weights_initializer_config=(
+                config.hidden_layer_weights_initializer_config
+            ),
+            hidden_layer_bias_initializer=config.hidden_layer_bias_initializer,
+            hidden_layer_bias_initializer_config=(
+                config.hidden_layer_bias_initializer_config
+            ),
+            output_dim=config.output_layer_dim,
+            output_activation=config.output_layer_activation,
+            output_use_bias=config.output_layer_use_bias,
+            output_weights_initializer=config.output_layer_weights_initializer,
+            output_weights_initializer_config=(
+                config.output_layer_weights_initializer_config
+            ),
+            output_bias_initializer=config.output_layer_bias_initializer,
+            output_bias_initializer_config=config.output_layer_bias_initializer_config,
+        )
+        # If log standard deviations should be clipped. This should be only true for
+        # policy heads. Value heads should never be clipped.
+        self.clip_log_std = config.clip_log_std
+        # The clipping parameter for the log standard deviation.
+        self.log_std_clip_param = torch.Tensor([config.log_std_clip_param])
+        # Register a buffer to handle device mapping.
+        self.register_buffer("log_std_clip_param_const", self.log_std_clip_param)
+    @override(Model)
+    def _forward(self, inputs: torch.Tensor, **kwargs) -> torch.Tensor:
+        # Only clip the log standard deviations, if the user wants to clip. This
+        # avoids also clipping value heads.
+        if self.clip_log_std:
+            # Forward pass.
+            means, log_stds = torch.chunk(self.net(inputs), chunks=2, dim=-1)
+            # Clip the log standard deviations.
+            log_stds = torch.clamp(
+                log_stds, -self.log_std_clip_param_const, self.log_std_clip_param_const
+            )
+            return torch.cat((means, log_stds), dim=-1)
+        # Otherwise just return the logits.
+        else:
+            return self.net(inputs)
+class TorchFreeLogStdMLPHead(TorchModel):
+    """An MLPHead that implements floating log stds for Gaussian distributions."""
+    def __init__(self, config: FreeLogStdMLPHeadConfig) -> None:
+        super().__init__(config)
+        assert config.output_dims[0] % 2 == 0, "output_dims must be even for free std!"
+        self._half_output_dim = config.output_dims[0] // 2
+        self.net = TorchMLP(
+            input_dim=config.input_dims[0],
+            hidden_layer_dims=config.hidden_layer_dims,
+            hidden_layer_activation=config.hidden_layer_activation,
+            hidden_layer_use_layernorm=config.hidden_layer_use_layernorm,
+            hidden_layer_use_bias=config.hidden_layer_use_bias,
+            hidden_layer_weights_initializer=config.hidden_layer_weights_initializer,
+            hidden_layer_weights_initializer_config=(
+                config.hidden_layer_weights_initializer_config
+            ),
+            hidden_layer_bias_initializer=config.hidden_layer_bias_initializer,
+            hidden_layer_bias_initializer_config=(
+                config.hidden_layer_bias_initializer_config
+            ),
+            output_dim=self._half_output_dim,
+            output_activation=config.output_layer_activation,
+            output_use_bias=config.output_layer_use_bias,
+            output_weights_initializer=config.output_layer_weights_initializer,
+            output_weights_initializer_config=(
+                config.output_layer_weights_initializer_config
+            ),
+            output_bias_initializer=config.output_layer_bias_initializer,
+            output_bias_initializer_config=config.output_layer_bias_initializer_config,
+        )
+        self.log_std = torch.nn.Parameter(
+            torch.as_tensor([0.0] * self._half_output_dim)
+        )
+        # If log standard deviations should be clipped. This should be only true for
+        # policy heads. Value heads should never be clipped.
+        self.clip_log_std = config.clip_log_std
+        # The clipping parameter for the log standard deviation.
+        self.log_std_clip_param = torch.Tensor(
+            [config.log_std_clip_param], device=self.log_std.device
+        )
+        # Register a buffer to handle device mapping.
+        self.register_buffer("log_std_clip_param_const", self.log_std_clip_param)
+    @override(Model)
+    def _forward(self, inputs: torch.Tensor, **kwargs) -> torch.Tensor:
+        # Compute the mean first, then append the log_std.
+        mean = self.net(inputs)
+        # If log standard deviation should be clipped.
+        if self.clip_log_std:
+            # Clip the log standard deviation to avoid running into too small
+            # deviations that factually collapses the policy.
+            log_std = torch.clamp(
+                self.log_std,
+                -self.log_std_clip_param_const,
+                self.log_std_clip_param_const,
+            )
+        else:
+            log_std = self.log_std
+        return torch.cat([mean, log_std.unsqueeze(0).repeat([len(mean), 1])], axis=1)
+class TorchCNNTransposeHead(TorchModel):
+    def __init__(self, config: CNNTransposeHeadConfig) -> None:
+        super().__init__(config)
+        # Initial, inactivated Dense layer (always w/ bias).
+        # This layer is responsible for getting the incoming tensor into a proper
+        # initial image shape (w x h x filters) for the suceeding Conv2DTranspose stack.
+        self.initial_dense = nn.Linear(
+            in_features=config.input_dims[0],
+            out_features=int(np.prod(config.initial_image_dims)),
+            bias=True,
+        )
+        # Initial Dense layer initializers.
+        initial_dense_weights_initializer = get_initializer_fn(
+            config.initial_dense_weights_initializer, framework="torch"
+        )
+        initial_dense_bias_initializer = get_initializer_fn(
+            config.initial_dense_bias_initializer, framework="torch"
+        )
+        # Initialize dense layer weights, if necessary.
+        if initial_dense_weights_initializer:
+            initial_dense_weights_initializer(
+                self.initial_dense.weight,
+                **config.initial_dense_weights_initializer_config or {},
+            )
+        # Initialized dense layer bais, if necessary.
+        if initial_dense_bias_initializer:
+            initial_dense_bias_initializer(
+                self.initial_dense.bias,
+                **config.initial_dense_bias_initializer_config or {},
+            )
+        # The main CNNTranspose stack.
+        self.cnn_transpose_net = TorchCNNTranspose(
+            input_dims=config.initial_image_dims,
+            cnn_transpose_filter_specifiers=config.cnn_transpose_filter_specifiers,
+            cnn_transpose_activation=config.cnn_transpose_activation,
+            cnn_transpose_use_layernorm=config.cnn_transpose_use_layernorm,
+            cnn_transpose_use_bias=config.cnn_transpose_use_bias,
+            cnn_transpose_kernel_initializer=config.cnn_transpose_kernel_initializer,
+            cnn_transpose_kernel_initializer_config=(
+                config.cnn_transpose_kernel_initializer_config
+            ),
+            cnn_transpose_bias_initializer=config.cnn_transpose_bias_initializer,
+            cnn_transpose_bias_initializer_config=(
+                config.cnn_transpose_bias_initializer_config
+            ),
+        )
+    @override(Model)
+    def _forward(self, inputs: torch.Tensor, **kwargs) -> torch.Tensor:
+        out = self.initial_dense(inputs)
+        # Reshape to initial 3D (image-like) format to enter CNN transpose stack.
+        out = out.reshape((-1,) + tuple(self.config.initial_image_dims))
+        out = self.cnn_transpose_net(out)
+        # Add 0.5 to center (always non-activated, non-normalized) outputs more
+        # around 0.0.
+        return out + 0.5

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/primitives.py ADDED Viewed

	@@ -0,0 +1,479 @@

+from typing import Callable, Dict, List, Optional, Union, Tuple
+from ray.rllib.core.models.torch.utils import Stride2D
+from ray.rllib.models.torch.misc import (
+    same_padding,
+    same_padding_transpose_after_stride,
+    valid_padding,
+)
+from ray.rllib.models.utils import get_activation_fn, get_initializer_fn
+from ray.rllib.utils.framework import try_import_torch
+torch, nn = try_import_torch()
+class TorchMLP(nn.Module):
+    """A multi-layer perceptron with N dense layers.
+    All layers (except for an optional additional extra output layer) share the same
+    activation function, bias setup (use bias or not), and LayerNorm setup
+    (use layer normalization or not).
+    If `output_dim` (int) is not None, an additional, extra output dense layer is added,
+    which might have its own activation function (e.g. "linear"). However, the output
+    layer does NOT use layer normalization.
+    """
+    def __init__(
+        self,
+        *,
+        input_dim: int,
+        hidden_layer_dims: List[int],
+        hidden_layer_activation: Union[str, Callable] = "relu",
+        hidden_layer_use_bias: bool = True,
+        hidden_layer_use_layernorm: bool = False,
+        hidden_layer_weights_initializer: Optional[Union[str, Callable]] = None,
+        hidden_layer_weights_initializer_config: Optional[Union[str, Callable]] = None,
+        hidden_layer_bias_initializer: Optional[Union[str, Callable]] = None,
+        hidden_layer_bias_initializer_config: Optional[Dict] = None,
+        output_dim: Optional[int] = None,
+        output_use_bias: bool = True,
+        output_activation: Union[str, Callable] = "linear",
+        output_weights_initializer: Optional[Union[str, Callable]] = None,
+        output_weights_initializer_config: Optional[Dict] = None,
+        output_bias_initializer: Optional[Union[str, Callable]] = None,
+        output_bias_initializer_config: Optional[Dict] = None,
+    ):
+        """Initialize a TorchMLP object.
+        Args:
+            input_dim: The input dimension of the network. Must not be None.
+            hidden_layer_dims: The sizes of the hidden layers. If an empty list, only a
+                single layer will be built of size `output_dim`.
+            hidden_layer_use_layernorm: Whether to insert a LayerNormalization
+                functionality in between each hidden layer's output and its activation.
+            hidden_layer_use_bias: Whether to use bias on all dense layers (excluding
+                the possible separate output layer).
+            hidden_layer_activation: The activation function to use after each layer
+                (except for the output). Either a torch.nn.[activation fn] callable or
+                the name thereof, or an RLlib recognized activation name,
+                e.g. "ReLU", "relu", "tanh", "SiLU", or "linear".
+            hidden_layer_weights_initializer: The initializer function or class to use
+                forweights initialization in the hidden layers. If `None` the default
+                initializer of the respective dense layer is used. Note, only the
+                in-place initializers, i.e. ending with an underscore "_" are allowed.
+            hidden_layer_weights_initializer_config: Configuration to pass into the
+                initializer defined in `hidden_layer_weights_initializer`.
+            hidden_layer_bias_initializer: The initializer function or class to use for
+                bias initialization in the hidden layers. If `None` the default
+                initializer of the respective dense layer is used. Note, only the
+                in-place initializers, i.e. ending with an underscore "_" are allowed.
+            hidden_layer_bias_initializer_config: Configuration to pass into the
+                initializer defined in `hidden_layer_bias_initializer`.
+            output_dim: The output dimension of the network. If None, no specific output
+                layer will be added and the last layer in the stack will have
+                size=`hidden_layer_dims[-1]`.
+            output_use_bias: Whether to use bias on the separate output layer,
+                if any.
+            output_activation: The activation function to use for the output layer
+                (if any). Either a torch.nn.[activation fn] callable or
+                the name thereof, or an RLlib recognized activation name,
+                e.g. "ReLU", "relu", "tanh", "SiLU", or "linear".
+            output_layer_weights_initializer: The initializer function or class to use
+                for weights initialization in the output layers. If `None` the default
+                initializer of the respective dense layer is used. Note, only the
+                in-place initializers, i.e. ending with an underscore "_" are allowed.
+            output_layer_weights_initializer_config: Configuration to pass into the
+                initializer defined in `output_layer_weights_initializer`.
+            output_layer_bias_initializer: The initializer function or class to use for
+                bias initialization in the output layers. If `None` the default
+                initializer of the respective dense layer is used. Note, only the
+                in-place initializers, i.e. ending with an underscore "_" are allowed.
+            output_layer_bias_initializer_config: Configuration to pass into the
+                initializer defined in `output_layer_bias_initializer`.
+        """
+        super().__init__()
+        assert input_dim > 0
+        self.input_dim = input_dim
+        hidden_activation = get_activation_fn(
+            hidden_layer_activation, framework="torch"
+        )
+        hidden_weights_initializer = get_initializer_fn(
+            hidden_layer_weights_initializer, framework="torch"
+        )
+        hidden_bias_initializer = get_initializer_fn(
+            hidden_layer_bias_initializer, framework="torch"
+        )
+        output_weights_initializer = get_initializer_fn(
+            output_weights_initializer, framework="torch"
+        )
+        output_bias_initializer = get_initializer_fn(
+            output_bias_initializer, framework="torch"
+        )
+        layers = []
+        dims = (
+            [self.input_dim]
+            + list(hidden_layer_dims)
+            + ([output_dim] if output_dim else [])
+        )
+        for i in range(0, len(dims) - 1):
+            # Whether we are already processing the last (special) output layer.
+            is_output_layer = output_dim is not None and i == len(dims) - 2
+            layer = nn.Linear(
+                dims[i],
+                dims[i + 1],
+                bias=output_use_bias if is_output_layer else hidden_layer_use_bias,
+            )
+            # Initialize layers, if necessary.
+            if is_output_layer:
+                # Initialize output layer weigths if necessary.
+                if output_weights_initializer:
+                    output_weights_initializer(
+                        layer.weight, **output_weights_initializer_config or {}
+                    )
+                # Initialize output layer bias if necessary.
+                if output_bias_initializer:
+                    output_bias_initializer(
+                        layer.bias, **output_bias_initializer_config or {}
+                    )
+            # Must be hidden.
+            else:
+                # Initialize hidden layer weights if necessary.
+                if hidden_layer_weights_initializer:
+                    hidden_weights_initializer(
+                        layer.weight, **hidden_layer_weights_initializer_config or {}
+                    )
+                # Initialize hidden layer bias if necessary.
+                if hidden_layer_bias_initializer:
+                    hidden_bias_initializer(
+                        layer.bias, **hidden_layer_bias_initializer_config or {}
+                    )
+            layers.append(layer)
+            # We are still in the hidden layer section: Possibly add layernorm and
+            # hidden activation.
+            if not is_output_layer:
+                # Insert a layer normalization in between layer's output and
+                # the activation.
+                if hidden_layer_use_layernorm:
+                    # We use an epsilon of 0.001 here to mimick the Tf default behavior.
+                    layers.append(nn.LayerNorm(dims[i + 1], eps=0.001))
+                # Add the activation function.
+                if hidden_activation is not None:
+                    layers.append(hidden_activation())
+        # Add output layer's (if any) activation.
+        output_activation = get_activation_fn(output_activation, framework="torch")
+        if output_dim is not None and output_activation is not None:
+            layers.append(output_activation())
+        self.mlp = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.mlp(x)
+class TorchCNN(nn.Module):
+    """A model containing a CNN with N Conv2D layers.
+    All layers share the same activation function, bias setup (use bias or not),
+    and LayerNorm setup (use layer normalization or not).
+    Note that there is no flattening nor an additional dense layer at the end of the
+    stack. The output of the network is a 3D tensor of dimensions
+    [width x height x num output filters].
+    """
+    def __init__(
+        self,
+        *,
+        input_dims: Union[List[int], Tuple[int]],
+        cnn_filter_specifiers: List[List[Union[int, List]]],
+        cnn_use_bias: bool = True,
+        cnn_use_layernorm: bool = False,
+        cnn_activation: str = "relu",
+        cnn_kernel_initializer: Optional[Union[str, Callable]] = None,
+        cnn_kernel_initializer_config: Optional[Dict] = None,
+        cnn_bias_initializer: Optional[Union[str, Callable]] = None,
+        cnn_bias_initializer_config: Optional[Dict] = None,
+    ):
+        """Initializes a TorchCNN instance.
+        Args:
+            input_dims: The 3D input dimensions of the network (incoming image).
+            cnn_filter_specifiers: A list in which each element is another (inner) list
+                of either the following forms:
+                `[number of channels/filters, kernel, stride]`
+                OR:
+                `[number of channels/filters, kernel, stride, padding]`, where `padding`
+                can either be "same" or "valid".
+                When using the first format w/o the `padding` specifier, `padding` is
+                "same" by default. Also, `kernel` and `stride` may be provided either as
+                single ints (square) or as a tuple/list of two ints (width- and height
+                dimensions) for non-squared kernel/stride shapes.
+                A good rule of thumb for constructing CNN stacks is:
+                When using padding="same", the input "image" will be reduced in size by
+                the factor `stride`, e.g. input=(84, 84, 3) stride=2 kernel=x
+                padding="same" filters=16 -> output=(42, 42, 16).
+                For example, if you would like to reduce an Atari image from its
+                original (84, 84, 3) dimensions down to (6, 6, F), you can construct the
+                following stack and reduce the w x h dimension of the image by 2 in each
+                layer:
+                [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]] -> output=(6, 6, 128)
+            cnn_use_bias: Whether to use bias on all Conv2D layers.
+            cnn_activation: The activation function to use after each Conv2D layer.
+            cnn_use_layernorm: Whether to insert a LayerNormalization functionality
+                in between each Conv2D layer's outputs and its activation.
+            cnn_kernel_initializer: The initializer function or class to use for kernel
+                initialization in the CNN layers. If `None` the default initializer of
+                the respective CNN layer is used. Note, only the in-place
+                initializers, i.e. ending with an underscore "_" are allowed.
+            cnn_kernel_initializer_config: Configuration to pass into the initializer
+                defined in `cnn_kernel_initializer`.
+            cnn_bias_initializer: The initializer function or class to use for bias
+                initializationcin the CNN layers. If `None` the default initializer of
+                the respective CNN layer is used. Note, only the in-place initializers,
+                i.e. ending with an underscore "_" are allowed.
+            cnn_bias_initializer_config: Configuration to pass into the initializer
+                defined in `cnn_bias_initializer`.
+        """
+        super().__init__()
+        assert len(input_dims) == 3
+        cnn_activation = get_activation_fn(cnn_activation, framework="torch")
+        cnn_kernel_initializer = get_initializer_fn(
+            cnn_kernel_initializer, framework="torch"
+        )
+        cnn_bias_initializer = get_initializer_fn(
+            cnn_bias_initializer, framework="torch"
+        )
+        layers = []
+        # Add user-specified hidden convolutional layers first
+        width, height, in_depth = input_dims
+        in_size = [width, height]
+        for filter_specs in cnn_filter_specifiers:
+            # Padding information not provided -> Use "same" as default.
+            if len(filter_specs) == 3:
+                out_depth, kernel_size, strides = filter_specs
+                padding = "same"
+            # Padding information provided.
+            else:
+                out_depth, kernel_size, strides, padding = filter_specs
+            # Pad like in tensorflow's SAME/VALID mode.
+            if padding == "same":
+                padding_size, out_size = same_padding(in_size, kernel_size, strides)
+                layers.append(nn.ZeroPad2d(padding_size))
+            # No actual padding is performed for "valid" mode, but we will still
+            # compute the output size (input for the next layer).
+            else:
+                out_size = valid_padding(in_size, kernel_size, strides)
+            layer = nn.Conv2d(
+                in_depth, out_depth, kernel_size, strides, bias=cnn_use_bias
+            )
+            # Initialize CNN layer kernel if necessary.
+            if cnn_kernel_initializer:
+                cnn_kernel_initializer(
+                    layer.weight, **cnn_kernel_initializer_config or {}
+                )
+            # Initialize CNN layer bias if necessary.
+            if cnn_bias_initializer:
+                cnn_bias_initializer(layer.bias, **cnn_bias_initializer_config or {})
+            layers.append(layer)
+            # Layernorm.
+            if cnn_use_layernorm:
+                # We use an epsilon of 0.001 here to mimick the Tf default behavior.
+                layers.append(LayerNorm1D(out_depth, eps=0.001))
+            # Activation.
+            if cnn_activation is not None:
+                layers.append(cnn_activation())
+            in_size = out_size
+            in_depth = out_depth
+        # Create the CNN.
+        self.cnn = nn.Sequential(*layers)
+    def forward(self, inputs):
+        # Permute b/c data comes in as channels_last ([B, dim, dim, channels]) ->
+        # Convert to `channels_first` for torch:
+        inputs = inputs.permute(0, 3, 1, 2)
+        out = self.cnn(inputs)
+        # Permute back to `channels_last`.
+        return out.permute(0, 2, 3, 1)
+class TorchCNNTranspose(nn.Module):
+    """A model containing a CNNTranspose with N Conv2DTranspose layers.
+    All layers share the same activation function, bias setup (use bias or not),
+    and LayerNormalization setup (use layer normalization or not), except for the last
+    one, which is never activated and never layer norm'd.
+    Note that there is no reshaping/flattening nor an additional dense layer at the
+    beginning or end of the stack. The input as well as output of the network are 3D
+    tensors of dimensions [width x height x num output filters].
+    """
+    def __init__(
+        self,
+        *,
+        input_dims: Union[List[int], Tuple[int]],
+        cnn_transpose_filter_specifiers: List[List[Union[int, List]]],
+        cnn_transpose_use_bias: bool = True,
+        cnn_transpose_activation: str = "relu",
+        cnn_transpose_use_layernorm: bool = False,
+        cnn_transpose_kernel_initializer: Optional[Union[str, Callable]] = None,
+        cnn_transpose_kernel_initializer_config: Optional[Dict] = None,
+        cnn_transpose_bias_initializer: Optional[Union[str, Callable]] = None,
+        cnn_transpose_bias_initializer_config: Optional[Dict] = None,
+    ):
+        """Initializes a TorchCNNTranspose instance.
+        Args:
+            input_dims: The 3D input dimensions of the network (incoming image).
+            cnn_transpose_filter_specifiers: A list of lists, where each item represents
+                one Conv2DTranspose layer. Each such Conv2DTranspose layer is further
+                specified by the elements of the inner lists. The inner lists follow
+                the format: `[number of filters, kernel, stride]` to
+                specify a convolutional-transpose layer stacked in order of the
+                outer list.
+                `kernel` as well as `stride` might be provided as width x height tuples
+                OR as single ints representing both dimension (width and height)
+                in case of square shapes.
+            cnn_transpose_use_bias: Whether to use bias on all Conv2DTranspose layers.
+            cnn_transpose_use_layernorm: Whether to insert a LayerNormalization
+                functionality in between each Conv2DTranspose layer's outputs and its
+                activation.
+                The last Conv2DTranspose layer will not be normed, regardless.
+            cnn_transpose_activation: The activation function to use after each layer
+                (except for the last Conv2DTranspose layer, which is always
+                non-activated).
+            cnn_transpose_kernel_initializer: The initializer function or class to use
+                for kernel initialization in the CNN layers. If `None` the default
+                initializer of the respective CNN layer is used. Note, only the
+                in-place initializers, i.e. ending with an underscore "_" are allowed.
+            cnn_transpose_kernel_initializer_config: Configuration to pass into the
+                initializer defined in `cnn_transpose_kernel_initializer`.
+            cnn_transpose_bias_initializer: The initializer function or class to use for
+                bias initialization in the CNN layers. If `None` the default initializer
+                of the respective CNN layer is used. Note, only the in-place
+                initializers, i.e. ending with an underscore "_" are allowed.
+            cnn_transpose_bias_initializer_config: Configuration to pass into the
+                initializer defined in `cnn_transpose_bias_initializer`.
+        """
+        super().__init__()
+        assert len(input_dims) == 3
+        cnn_transpose_activation = get_activation_fn(
+            cnn_transpose_activation, framework="torch"
+        )
+        cnn_transpose_kernel_initializer = get_initializer_fn(
+            cnn_transpose_kernel_initializer, framework="torch"
+        )
+        cnn_transpose_bias_initializer = get_initializer_fn(
+            cnn_transpose_bias_initializer, framework="torch"
+        )
+        layers = []
+        # Add user-specified hidden convolutional layers first
+        width, height, in_depth = input_dims
+        in_size = [width, height]
+        for i, (out_depth, kernel, stride) in enumerate(
+            cnn_transpose_filter_specifiers
+        ):
+            is_final_layer = i == len(cnn_transpose_filter_specifiers) - 1
+            # Resolve stride and kernel width/height values if only int given (squared).
+            s_w, s_h = (stride, stride) if isinstance(stride, int) else stride
+            k_w, k_h = (kernel, kernel) if isinstance(kernel, int) else kernel
+            # Stride the incoming image first.
+            stride_layer = Stride2D(in_size[0], in_size[1], s_w, s_h)
+            layers.append(stride_layer)
+            # Then 0-pad (like in tensorflow's SAME mode).
+            # This will return the necessary padding such that for stride=1, the output
+            # image has the same size as the input image, for stride=2, the output image
+            # is 2x the input image, etc..
+            padding, out_size = same_padding_transpose_after_stride(
+                (stride_layer.out_width, stride_layer.out_height), kernel, stride
+            )
+            layers.append(nn.ZeroPad2d(padding))  # left, right, top, bottom
+            # Then do the Conv2DTranspose operation
+            # (now that we have padded and strided manually, w/o any more padding using
+            # stride=1).
+            layer = nn.ConvTranspose2d(
+                in_depth,
+                out_depth,
+                kernel,
+                # Force-set stride to 1 as we already took care of it.
+                1,
+                # Disable torch auto-padding (torch interprets the padding setting
+                # as: dilation (==1.0) * [`kernel` - 1] - [`padding`]).
+                padding=(k_w - 1, k_h - 1),
+                # Last layer always uses bias (b/c has no LayerNorm, regardless of
+                # config).
+                bias=cnn_transpose_use_bias or is_final_layer,
+            )
+            # Initialize CNN Transpose layer kernel if necessary.
+            if cnn_transpose_kernel_initializer:
+                cnn_transpose_kernel_initializer(
+                    layer.weight, **cnn_transpose_kernel_initializer_config or {}
+                )
+            # Initialize CNN Transpose layer bias if necessary.
+            if cnn_transpose_bias_initializer:
+                cnn_transpose_bias_initializer(
+                    layer.bias, **cnn_transpose_bias_initializer_config or {}
+                )
+            layers.append(layer)
+            # Layernorm (never for final layer).
+            if cnn_transpose_use_layernorm and not is_final_layer:
+                layers.append(LayerNorm1D(out_depth, eps=0.001))
+            # Last layer is never activated (regardless of config).
+            if cnn_transpose_activation is not None and not is_final_layer:
+                layers.append(cnn_transpose_activation())
+            in_size = (out_size[0], out_size[1])
+            in_depth = out_depth
+        # Create the final CNNTranspose network.
+        self.cnn_transpose = nn.Sequential(*layers)
+    def forward(self, inputs):
+        # Permute b/c data comes in as [B, dim, dim, channels]:
+        out = inputs.permute(0, 3, 1, 2)
+        out = self.cnn_transpose(out)
+        return out.permute(0, 2, 3, 1)
+class LayerNorm1D(nn.Module):
+    def __init__(self, num_features, **kwargs):
+        super().__init__()
+        self.layer_norm = nn.LayerNorm(num_features, **kwargs)
+    def forward(self, x):
+        # x shape: (B, dim, dim, channels).
+        batch_size, channels, h, w = x.size()
+        # Reshape to (batch_size * height * width, channels) for LayerNorm
+        x = x.permute(0, 2, 3, 1).reshape(-1, channels)
+        # Apply LayerNorm
+        x = self.layer_norm(x)
+        # Reshape back to (batch_size, dim, dim, channels)
+        x = x.reshape(batch_size, h, w, channels).permute(0, 3, 1, 2)
+        return x

.venv/lib/python3.11/site-packages/ray/rllib/core/models/torch/utils.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from ray.rllib.utils.framework import try_import_torch
+torch, nn = try_import_torch()
+class Stride2D(nn.Module):
+    """A striding layer for doing torch Conv2DTranspose operations.
+    Using this layer before the 0-padding (on a 3D input "image") and before
+    the actual ConvTranspose2d allows for a padding="same" behavior that matches
+    100% that of a `tf.keras.layers.Conv2DTranspose` layer.
+    Examples:
+        Input image (4x4):
+        A B C D
+        E F G H
+        I J K L
+        M N O P
+        Stride with stride=2 -> output image=(7x7)
+        A 0 B 0 C 0 D
+        0 0 0 0 0 0 0
+        E 0 F 0 G 0 H
+        0 0 0 0 0 0 0
+        I 0 J 0 K 0 L
+        0 0 0 0 0 0 0
+        M 0 N 0 O 0 P
+    """
+    def __init__(self, width, height, stride_w, stride_h):
+        """Initializes a Stride2D instance.
+        Args:
+            width: The width of the 3D input "image".
+            height: The height of the 3D input "image".
+            stride_w: The stride in width direction, with which to stride the incoming
+                image.
+            stride_h: The stride in height direction, with which to stride the incoming
+                image.
+        """
+        super().__init__()
+        self.width = width
+        self.height = height
+        self.stride_w = stride_w
+        self.stride_h = stride_h
+        self.register_buffer(
+            "zeros",
+            torch.zeros(
+                size=(
+                    self.width * self.stride_w - (self.stride_w - 1),
+                    self.height * self.stride_h - (self.stride_h - 1),
+                ),
+                dtype=torch.float32,
+            ),
+        )
+        self.out_width, self.out_height = self.zeros.shape[0], self.zeros.shape[1]
+        # Squeeze in batch and channel dims.
+        self.zeros = self.zeros.unsqueeze(0).unsqueeze(0)
+        where_template = torch.zeros(
+            (self.stride_w, self.stride_h), dtype=torch.float32
+        )
+        # Set upper/left corner to 1.0.
+        where_template[0][0] = 1.0
+        # then tile across the entire (strided) image size.
+        where_template = where_template.repeat((self.height, self.width))[
+            : -(self.stride_w - 1), : -(self.stride_h - 1)
+        ]
+        # Squeeze in batch and channel dims and convert to bool.
+        where_template = where_template.unsqueeze(0).unsqueeze(0).bool()
+        self.register_buffer("where_template", where_template)
+    def forward(self, x):
+        # Repeat incoming image stride(w/h) times to match the strided output template.
+        repeated_x = (
+            x.repeat_interleave(self.stride_w, dim=-2).repeat_interleave(
+                self.stride_h, dim=-1
+            )
+        )[:, :, : -(self.stride_w - 1), : -(self.stride_h - 1)]
+        # Where `self.where_template` == 1.0 -> Use image pixel, otherwise use
+        # zero filler value.
+        return torch.where(self.where_template, repeated_x, self.zeros)

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (195 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/bc_algorithm.cpython-311.pyc ADDED Viewed

Binary file (3.65 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/__pycache__/testing_learner.cpython-311.pyc ADDED Viewed

Binary file (4.4 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/bc_algorithm.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Contains example implementation of a custom algorithm.
+Note: It doesn't include any real use-case functionality; it only serves as an example
+to test the algorithm construction and customization.
+"""
+from ray.rllib.algorithms import Algorithm, AlgorithmConfig
+from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2
+from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2
+from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
+from ray.rllib.core.testing.torch.bc_learner import BCTorchLearner
+from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule
+from ray.rllib.core.testing.tf.bc_learner import BCTfLearner
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import ResultDict
+class BCConfigTest(AlgorithmConfig):
+    def __init__(self, algo_class=None):
+        super().__init__(algo_class=algo_class or BCAlgorithmTest)
+    def get_default_rl_module_spec(self):
+        if self.framework_str == "torch":
+            return RLModuleSpec(module_class=DiscreteBCTorchModule)
+        elif self.framework_str == "tf2":
+            return RLModuleSpec(module_class=DiscreteBCTFModule)
+    def get_default_learner_class(self):
+        if self.framework_str == "torch":
+            return BCTorchLearner
+        elif self.framework_str == "tf2":
+            return BCTfLearner
+class BCAlgorithmTest(Algorithm):
+    @classmethod
+    def get_default_policy_class(cls, config: AlgorithmConfig):
+        if config.framework_str == "torch":
+            return TorchPolicyV2
+        elif config.framework_str == "tf2":
+            return EagerTFPolicyV2
+        else:
+            raise ValueError("Unknown framework: {}".format(config.framework_str))
+    @override(Algorithm)
+    def training_step(self) -> ResultDict:
+        # do nothing.
+        return {}

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/testing_learner.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from typing import Type
+import numpy as np
+from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
+from ray.rllib.core import DEFAULT_MODULE_ID
+from ray.rllib.core.learner.learner import Learner
+from ray.rllib.core.rl_module.multi_rl_module import (
+    MultiRLModule,
+    MultiRLModuleSpec,
+)
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.numpy import convert_to_numpy
+from ray.rllib.utils.typing import RLModuleSpecType
+class BaseTestingAlgorithmConfig(AlgorithmConfig):
+    # A test setting to activate metrics on mean weights.
+    report_mean_weights: bool = True
+    @override(AlgorithmConfig)
+    def get_default_learner_class(self) -> Type["Learner"]:
+        if self.framework_str == "tf2":
+            from ray.rllib.core.testing.tf.bc_learner import BCTfLearner
+            return BCTfLearner
+        elif self.framework_str == "torch":
+            from ray.rllib.core.testing.torch.bc_learner import BCTorchLearner
+            return BCTorchLearner
+        else:
+            raise ValueError(f"Unsupported framework: {self.framework_str}")
+    @override(AlgorithmConfig)
+    def get_default_rl_module_spec(self) -> "RLModuleSpecType":
+        if self.framework_str == "tf2":
+            from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule
+            cls = DiscreteBCTFModule
+        elif self.framework_str == "torch":
+            from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
+            cls = DiscreteBCTorchModule
+        else:
+            raise ValueError(f"Unsupported framework: {self.framework_str}")
+        spec = RLModuleSpec(
+            module_class=cls,
+            model_config={"fcnet_hiddens": [32]},
+        )
+        if self.is_multi_agent:
+            # TODO (Kourosh): Make this more multi-agent for example with policy ids
+            #  "1" and "2".
+            return MultiRLModuleSpec(
+                multi_rl_module_class=MultiRLModule,
+                rl_module_specs={DEFAULT_MODULE_ID: spec},
+            )
+        else:
+            return spec
+class BaseTestingLearner(Learner):
+    @override(Learner)
+    def after_gradient_based_update(self, *, timesteps):
+        # This is to check if in the multi-gpu case, the weights across workers are
+        # the same. It is really only needed during testing.
+        if self.config.report_mean_weights:
+            for module_id in self.module.keys():
+                parameters = convert_to_numpy(
+                    self.get_parameters(self.module[module_id])
+                )
+                mean_ws = np.mean([w.mean() for w in parameters])
+                self.metrics.log_value((module_id, "mean_weight"), mean_ws, window=1)

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (198 Bytes). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_learner.cpython-311.pyc ADDED Viewed

Binary file (2.05 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/__pycache__/bc_module.cpython-311.pyc ADDED Viewed

Binary file (7.41 kB). View file

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_learner.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import tensorflow as tf
+from typing import Dict, TYPE_CHECKING
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.learner.tf.tf_learner import TfLearner
+from ray.rllib.core.testing.testing_learner import BaseTestingLearner
+from ray.rllib.utils.typing import ModuleID, TensorType
+if TYPE_CHECKING:
+    from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
+class BCTfLearner(TfLearner, BaseTestingLearner):
+    def compute_loss_for_module(
+        self,
+        *,
+        module_id: ModuleID,
+        config: "AlgorithmConfig",
+        batch: Dict,
+        fwd_out: Dict[str, TensorType],
+    ) -> TensorType:
+        BaseTestingLearner.compute_loss_for_module(
+            self,
+            module_id=module_id,
+            config=config,
+            batch=batch,
+            fwd_out=fwd_out,
+        )
+        action_dist_inputs = fwd_out[Columns.ACTION_DIST_INPUTS]
+        action_dist_class = self._module[module_id].get_train_action_dist_cls()
+        action_dist = action_dist_class.from_logits(action_dist_inputs)
+        loss = -tf.math.reduce_mean(action_dist.logp(batch[Columns.ACTIONS]))
+        return loss

.venv/lib/python3.11/site-packages/ray/rllib/core/testing/tf/bc_module.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import tensorflow as tf
+from typing import Any, Dict
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.rl_module import RLModule
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModule
+from ray.rllib.core.rl_module.tf.tf_rl_module import TfRLModule
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import StateDict
+class DiscreteBCTFModule(TfRLModule):
+    def setup(self):
+        input_dim = self.observation_space.shape[0]
+        hidden_dim = self.model_config["fcnet_hiddens"][0]
+        output_dim = self.action_space.n
+        layers = []
+        layers.append(tf.keras.Input(shape=(input_dim,)))
+        layers.append(tf.keras.layers.ReLU())
+        layers.append(tf.keras.layers.Dense(hidden_dim))
+        layers.append(tf.keras.layers.ReLU())
+        layers.append(tf.keras.layers.Dense(output_dim))
+        self.policy = tf.keras.Sequential(layers)
+        self._input_dim = input_dim
+    def _forward(self, batch: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        action_logits = self.policy(batch["obs"])
+        return {Columns.ACTION_DIST_INPUTS: action_logits}
+    @override(RLModule)
+    def get_state(self, *args, **kwargs) -> StateDict:
+        return {"policy": self.policy.get_weights()}
+    @override(RLModule)
+    def set_state(self, state: StateDict) -> None:
+        self.policy.set_weights(state["policy"])
+class BCTfRLModuleWithSharedGlobalEncoder(TfRLModule):
+    def __init__(self, encoder, local_dim, hidden_dim, action_dim):
+        super().__init__()
+        self.encoder = encoder
+        self.policy_head = tf.keras.Sequential(
+            [
+                tf.keras.layers.Dense(
+                    hidden_dim + local_dim,
+                    input_shape=(hidden_dim + local_dim,),
+                    activation="relu",
+                ),
+                tf.keras.layers.Dense(hidden_dim, activation="relu"),
+                tf.keras.layers.Dense(action_dim),
+            ]
+        )
+    def _forward(self, batch, **kwargs):
+        obs = batch["obs"]
+        global_enc = self.encoder(obs["global"])
+        policy_in = tf.concat([global_enc, obs["local"]], axis=-1)
+        action_logits = self.policy_head(policy_in)
+        return {Columns.ACTION_DIST_INPUTS: action_logits}
+    @override(RLModule)
+    def _default_input_specs(self):
+        return [("obs", "global"), ("obs", "local")]
+class BCTfMultiAgentModuleWithSharedEncoder(MultiRLModule):
+    def setup(self):
+        # constructing the global encoder based on the observation_space of the first
+        # module
+        module_specs = self.config.modules
+        module_spec = next(iter(module_specs.values()))
+        global_dim = module_spec.observation_space["global"].shape[0]
+        hidden_dim = module_spec.model_config_dict["fcnet_hiddens"][0]
+        shared_encoder = tf.keras.Sequential(
+            [
+                tf.keras.Input(shape=(global_dim,)),
+                tf.keras.layers.ReLU(),
+                tf.keras.layers.Dense(hidden_dim),
+            ]
+        )
+        for module_id, module_spec in module_specs.items():
+            self._rl_modules[module_id] = module_spec.module_class(
+                encoder=shared_encoder,
+                local_dim=module_spec.observation_space["local"].shape[0],
+                hidden_dim=hidden_dim,
+                action_dim=module_spec.action_space.n,
+            )
+    def serialize(self):
+        # TODO (Kourosh): Implement when needed.
+        raise NotImplementedError
+    def deserialize(self, data):
+        # TODO (Kourosh): Implement when needed.
+        raise NotImplementedError