diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54bd4887b2e288b3cf93d6f4d8a05c34b02dd31b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector_v2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector_v2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd72695dd31679152056b03ff0cecb320f6e5273 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/connector_v2.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/registry.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/registry.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da7e068c6cd01cead1823ee5dffdc47e4b032451 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/registry.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/util.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/util.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f99ccf5452bc0f8bbb313153aab2cc043ca1730c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/__pycache__/util.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/lambdas.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/lambdas.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e182e49d60e5c1386967d594408c95350ff0a30b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/lambdas.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/mean_std_filter.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/mean_std_filter.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a49909abdd8c0096fb662557e589eb67de5ac92f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/mean_std_filter.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/obs_preproc.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/obs_preproc.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8753675f53c1dc1e8aff3064348708513da73de Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/obs_preproc.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/state_buffer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/state_buffer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1af36a01fe22bccfa5b8b297d4f31adadb0c353 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/__pycache__/state_buffer.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/clip_reward.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/clip_reward.py new file mode 100644 index 0000000000000000000000000000000000000000..8d6c89916c97ebd8b2ede36d840c37de2a602883 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/clip_reward.py @@ -0,0 +1,56 @@ +from typing import Any + +import numpy as np + +from ray.rllib.connectors.connector import ( + AgentConnector, + ConnectorContext, +) +from ray.rllib.connectors.registry import register_connector +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.typing import AgentConnectorDataType +from ray.rllib.utils.annotations import OldAPIStack + + +@OldAPIStack +class ClipRewardAgentConnector(AgentConnector): + def __init__(self, ctx: ConnectorContext, sign=False, limit=None): + super().__init__(ctx) + assert ( + not sign or not limit + ), "should not enable both sign and limit reward clipping." + self.sign = sign + self.limit = limit + + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + d = ac_data.data + assert ( + type(d) is dict + ), "Single agent data must be of type Dict[str, TensorStructType]" + + if SampleBatch.REWARDS not in d: + # Nothing to clip. May happen for initial obs. + return ac_data + + if self.sign: + d[SampleBatch.REWARDS] = np.sign(d[SampleBatch.REWARDS]) + elif self.limit: + d[SampleBatch.REWARDS] = np.clip( + d[SampleBatch.REWARDS], + a_min=-self.limit, + a_max=self.limit, + ) + return ac_data + + def to_state(self): + return ClipRewardAgentConnector.__name__, { + "sign": self.sign, + "limit": self.limit, + } + + @staticmethod + def from_state(ctx: ConnectorContext, params: Any): + return ClipRewardAgentConnector(ctx, **params) + + +register_connector(ClipRewardAgentConnector.__name__, ClipRewardAgentConnector) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/env_sampling.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/env_sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..f0ba6f0a4384caccd6647edde520cb7a84f0dc65 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/env_sampling.py @@ -0,0 +1,30 @@ +from typing import Any + +from ray.rllib.connectors.connector import ( + AgentConnector, + ConnectorContext, +) +from ray.rllib.connectors.registry import register_connector +from ray.rllib.utils.typing import AgentConnectorDataType +from ray.rllib.utils.annotations import OldAPIStack + + +@OldAPIStack +class EnvSamplingAgentConnector(AgentConnector): + def __init__(self, ctx: ConnectorContext, sign=False, limit=None): + super().__init__(ctx) + self.observation_space = ctx.observation_space + + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + # EnvSamplingAgentConnector is a no-op connector. + return ac_data + + def to_state(self): + return EnvSamplingAgentConnector.__name__, {} + + @staticmethod + def from_state(ctx: ConnectorContext, params: Any): + return EnvSamplingAgentConnector(ctx, **params) + + +register_connector(EnvSamplingAgentConnector.__name__, EnvSamplingAgentConnector) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/lambdas.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/lambdas.py new file mode 100644 index 0000000000000000000000000000000000000000..05a714a0df982e36bce96c33ddfb6f6e9ce05188 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/lambdas.py @@ -0,0 +1,86 @@ +from typing import Any, Callable, Type + +import numpy as np +import tree # dm_tree + +from ray.rllib.connectors.connector import ( + AgentConnector, + ConnectorContext, +) +from ray.rllib.connectors.registry import register_connector +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.typing import ( + AgentConnectorDataType, + AgentConnectorsOutput, +) +from ray.rllib.utils.annotations import OldAPIStack + + +@OldAPIStack +def register_lambda_agent_connector( + name: str, fn: Callable[[Any], Any] +) -> Type[AgentConnector]: + """A util to register any simple transforming function as an AgentConnector + + The only requirement is that fn should take a single data object and return + a single data object. + + Args: + name: Name of the resulting actor connector. + fn: The function that transforms env / agent data. + + Returns: + A new AgentConnector class that transforms data using fn. + """ + + class LambdaAgentConnector(AgentConnector): + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + return AgentConnectorDataType( + ac_data.env_id, ac_data.agent_id, fn(ac_data.data) + ) + + def to_state(self): + return name, None + + @staticmethod + def from_state(ctx: ConnectorContext, params: Any): + return LambdaAgentConnector(ctx) + + LambdaAgentConnector.__name__ = name + LambdaAgentConnector.__qualname__ = name + + register_connector(name, LambdaAgentConnector) + + return LambdaAgentConnector + + +@OldAPIStack +def flatten_data(data: AgentConnectorsOutput): + assert isinstance( + data, AgentConnectorsOutput + ), "Single agent data must be of type AgentConnectorsOutput" + + raw_dict = data.raw_dict + sample_batch = data.sample_batch + + flattened = {} + for k, v in sample_batch.items(): + if k in [SampleBatch.INFOS, SampleBatch.ACTIONS] or k.startswith("state_out_"): + # Do not flatten infos, actions, and state_out_ columns. + flattened[k] = v + continue + if v is None: + # Keep the same column shape. + flattened[k] = None + continue + flattened[k] = np.array(tree.flatten(v)) + flattened = SampleBatch(flattened, is_training=False) + + return AgentConnectorsOutput(raw_dict, flattened) + + +# Agent connector to build and return a flattened observation SampleBatch +# in addition to the original input dict. +FlattenDataAgentConnector = OldAPIStack( + register_lambda_agent_connector("FlattenDataAgentConnector", flatten_data) +) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/mean_std_filter.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/mean_std_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..b2079344a203d7e08970672fe8fefcaa2caa1bb1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/mean_std_filter.py @@ -0,0 +1,187 @@ +from typing import Any, List +from gymnasium.spaces import Discrete, MultiDiscrete + +import numpy as np +import tree + +from ray.rllib.connectors.agent.synced_filter import SyncedFilterAgentConnector +from ray.rllib.connectors.connector import AgentConnector +from ray.rllib.connectors.connector import ( + ConnectorContext, +) +from ray.rllib.connectors.registry import register_connector +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.annotations import OldAPIStack +from ray.rllib.utils.filter import Filter +from ray.rllib.utils.filter import MeanStdFilter, ConcurrentMeanStdFilter +from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space +from ray.rllib.utils.typing import AgentConnectorDataType +from ray.rllib.utils.filter import RunningStat + + +@OldAPIStack +class MeanStdObservationFilterAgentConnector(SyncedFilterAgentConnector): + """A connector used to mean-std-filter observations. + + Incoming observations are filtered such that the output of this filter is on + average zero and has a standard deviation of 1. This filtering is applied + separately per element of the observation space. + """ + + def __init__( + self, + ctx: ConnectorContext, + demean: bool = True, + destd: bool = True, + clip: float = 10.0, + ): + SyncedFilterAgentConnector.__init__(self, ctx) + # We simply use the old MeanStdFilter until non-connector env_runner is fully + # deprecated to avoid duplicate code + + filter_shape = tree.map_structure( + lambda s: ( + None + if isinstance(s, (Discrete, MultiDiscrete)) # noqa + else np.array(s.shape) + ), + get_base_struct_from_space(ctx.observation_space), + ) + self.filter = MeanStdFilter(filter_shape, demean=demean, destd=destd, clip=clip) + + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + d = ac_data.data + assert ( + type(d) is dict + ), "Single agent data must be of type Dict[str, TensorStructType]" + if SampleBatch.OBS in d: + d[SampleBatch.OBS] = self.filter( + d[SampleBatch.OBS], update=self._is_training + ) + if SampleBatch.NEXT_OBS in d: + d[SampleBatch.NEXT_OBS] = self.filter( + d[SampleBatch.NEXT_OBS], update=self._is_training + ) + + return ac_data + + def to_state(self): + # Flattening is deterministic + flattened_rs = tree.flatten(self.filter.running_stats) + flattened_buffer = tree.flatten(self.filter.buffer) + return MeanStdObservationFilterAgentConnector.__name__, { + "shape": self.filter.shape, + "no_preprocessor": self.filter.no_preprocessor, + "demean": self.filter.demean, + "destd": self.filter.destd, + "clip": self.filter.clip, + "running_stats": [s.to_state() for s in flattened_rs], + "buffer": [s.to_state() for s in flattened_buffer], + } + + # demean, destd, clip, and a state dict + @staticmethod + def from_state( + ctx: ConnectorContext, + params: List[Any] = None, + demean: bool = True, + destd: bool = True, + clip: float = 10.0, + ): + connector = MeanStdObservationFilterAgentConnector(ctx, demean, destd, clip) + if params: + connector.filter.shape = params["shape"] + connector.filter.no_preprocessor = params["no_preprocessor"] + connector.filter.demean = params["demean"] + connector.filter.destd = params["destd"] + connector.filter.clip = params["clip"] + + # Unflattening is deterministic + running_stats = [RunningStat.from_state(s) for s in params["running_stats"]] + connector.filter.running_stats = tree.unflatten_as( + connector.filter.shape, running_stats + ) + + # Unflattening is deterministic + buffer = [RunningStat.from_state(s) for s in params["buffer"]] + connector.filter.buffer = tree.unflatten_as(connector.filter.shape, buffer) + + return connector + + def reset_state(self) -> None: + """Creates copy of current state and resets accumulated state""" + if not self._is_training: + raise ValueError( + "State of {} can only be changed when trainin.".format(self.__name__) + ) + self.filter.reset_buffer() + + def apply_changes(self, other: "Filter", *args, **kwargs) -> None: + """Updates self with state from other filter.""" + # inline this as soon as we deprecate ordinary filter with non-connector + # env_runner + if not self._is_training: + raise ValueError( + "Changes can only be applied to {} when trainin.".format(self.__name__) + ) + return self.filter.apply_changes(other, *args, **kwargs) + + def copy(self) -> "Filter": + """Creates a new object with same state as self. + + This is a legacy Filter method that we need to keep around for now + + Returns: + A copy of self. + """ + # inline this as soon as we deprecate ordinary filter with non-connector + # env_runner + return self.filter.copy() + + def sync(self, other: "AgentConnector") -> None: + """Copies all state from other filter to self.""" + # inline this as soon as we deprecate ordinary filter with non-connector + # env_runner + if not self._is_training: + raise ValueError( + "{} can only be synced when trainin.".format(self.__name__) + ) + return self.filter.sync(other.filter) + + +@OldAPIStack +class ConcurrentMeanStdObservationFilterAgentConnector( + MeanStdObservationFilterAgentConnector +): + """A concurrent version of the MeanStdObservationFilterAgentConnector. + + This version's filter has all operations wrapped by a threading.RLock. + It can therefore be safely used by multiple threads. + """ + + def __init__(self, ctx: ConnectorContext, demean=True, destd=True, clip=10.0): + SyncedFilterAgentConnector.__init__(self, ctx) + # We simply use the old MeanStdFilter until non-connector env_runner is fully + # deprecated to avoid duplicate code + + filter_shape = tree.map_structure( + lambda s: ( + None + if isinstance(s, (Discrete, MultiDiscrete)) # noqa + else np.array(s.shape) + ), + get_base_struct_from_space(ctx.observation_space), + ) + self.filter = ConcurrentMeanStdFilter( + filter_shape, demean=True, destd=True, clip=10.0 + ) + + +register_connector( + MeanStdObservationFilterAgentConnector.__name__, + MeanStdObservationFilterAgentConnector, +) +register_connector( + ConcurrentMeanStdObservationFilterAgentConnector.__name__, + ConcurrentMeanStdObservationFilterAgentConnector, +) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/obs_preproc.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/obs_preproc.py new file mode 100644 index 0000000000000000000000000000000000000000..f783bb6718cca79a26752e2c961f880968a1ce58 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/obs_preproc.py @@ -0,0 +1,69 @@ +from typing import Any + +from ray.rllib.connectors.connector import ( + AgentConnector, + ConnectorContext, +) +from ray.rllib.connectors.registry import register_connector +from ray.rllib.models.preprocessors import get_preprocessor, NoPreprocessor +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.typing import AgentConnectorDataType +from ray.rllib.utils.annotations import OldAPIStack + + +@OldAPIStack +class ObsPreprocessorConnector(AgentConnector): + """A connector that wraps around existing RLlib observation preprocessors. + + This includes: + - OneHotPreprocessor for Discrete and Multi-Discrete spaces. + - GenericPixelPreprocessor and AtariRamPreprocessor for Atari spaces. + - TupleFlatteningPreprocessor and DictFlatteningPreprocessor for flattening + arbitrary nested input observations. + - RepeatedValuesPreprocessor for padding observations from RLlib Repeated + observation space. + """ + + def __init__(self, ctx: ConnectorContext): + super().__init__(ctx) + + if hasattr(ctx.observation_space, "original_space"): + # ctx.observation_space is the space this Policy deals with. + # We need to preprocess data from the original observation space here. + obs_space = ctx.observation_space.original_space + else: + obs_space = ctx.observation_space + + self._preprocessor = get_preprocessor(obs_space)( + obs_space, ctx.config.get("model", {}) + ) + + def is_identity(self): + """Returns whether this preprocessor connector is a no-op preprocessor.""" + return isinstance(self._preprocessor, NoPreprocessor) + + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + d = ac_data.data + assert type(d) is dict, ( + "Single agent data must be of type Dict[str, TensorStructType] but is of " + "type {}".format(type(d)) + ) + + if SampleBatch.OBS in d: + d[SampleBatch.OBS] = self._preprocessor.transform(d[SampleBatch.OBS]) + if SampleBatch.NEXT_OBS in d: + d[SampleBatch.NEXT_OBS] = self._preprocessor.transform( + d[SampleBatch.NEXT_OBS] + ) + + return ac_data + + def to_state(self): + return ObsPreprocessorConnector.__name__, None + + @staticmethod + def from_state(ctx: ConnectorContext, params: Any): + return ObsPreprocessorConnector(ctx) + + +register_connector(ObsPreprocessorConnector.__name__, ObsPreprocessorConnector) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/pipeline.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..898ac79b1c709429e4751eca7eb427b3afa26a4e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/pipeline.py @@ -0,0 +1,72 @@ +import logging +from typing import Any, List +from collections import defaultdict + +from ray.rllib.connectors.connector import ( + AgentConnector, + Connector, + ConnectorContext, + ConnectorPipeline, +) +from ray.rllib.connectors.registry import get_connector, register_connector +from ray.rllib.utils.typing import ActionConnectorDataType, AgentConnectorDataType +from ray.rllib.utils.annotations import OldAPIStack +from ray.util.timer import _Timer + + +logger = logging.getLogger(__name__) + + +@OldAPIStack +class AgentConnectorPipeline(ConnectorPipeline, AgentConnector): + def __init__(self, ctx: ConnectorContext, connectors: List[Connector]): + super().__init__(ctx, connectors) + self.timers = defaultdict(_Timer) + + def reset(self, env_id: str): + for c in self.connectors: + c.reset(env_id) + + def on_policy_output(self, output: ActionConnectorDataType): + for c in self.connectors: + c.on_policy_output(output) + + def __call__( + self, acd_list: List[AgentConnectorDataType] + ) -> List[AgentConnectorDataType]: + ret = acd_list + for c in self.connectors: + timer = self.timers[str(c)] + with timer: + ret = c(ret) + return ret + + def to_state(self): + children = [] + for c in self.connectors: + state = c.to_state() + assert isinstance(state, tuple) and len(state) == 2, ( + "Serialized connector state must be in the format of " + f"Tuple[name: str, params: Any]. Instead we got {state}" + f"for connector {c.__name__}." + ) + children.append(state) + return AgentConnectorPipeline.__name__, children + + @staticmethod + def from_state(ctx: ConnectorContext, params: List[Any]): + assert ( + type(params) is list + ), "AgentConnectorPipeline takes a list of connector params." + connectors = [] + for state in params: + try: + name, subparams = state + connectors.append(get_connector(name, ctx, subparams)) + except Exception as e: + logger.error(f"Failed to de-serialize connector state: {state}") + raise e + return AgentConnectorPipeline(ctx, connectors) + + +register_connector(AgentConnectorPipeline.__name__, AgentConnectorPipeline) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/state_buffer.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/state_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..4516abd8bbe0ad47d6cc96baedb8909ff26b62fd --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/state_buffer.py @@ -0,0 +1,120 @@ +from collections import defaultdict +import logging +import pickle +from typing import Any + +import numpy as np +from ray.rllib.utils.annotations import override +import tree # dm_tree + +from ray.rllib.connectors.connector import ( + AgentConnector, + Connector, + ConnectorContext, +) +from ray import cloudpickle +from ray.rllib.connectors.registry import register_connector +from ray.rllib.core.columns import Columns +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space +from ray.rllib.utils.typing import ActionConnectorDataType, AgentConnectorDataType +from ray.rllib.utils.annotations import OldAPIStack + + +logger = logging.getLogger(__name__) + + +@OldAPIStack +class StateBufferConnector(AgentConnector): + def __init__(self, ctx: ConnectorContext, states: Any = None): + super().__init__(ctx) + + self._initial_states = ctx.initial_states + self._action_space_struct = get_base_struct_from_space(ctx.action_space) + + self._states = defaultdict(lambda: defaultdict(lambda: (None, None, None))) + self._enable_new_api_stack = False + # TODO(jungong) : we would not need this if policies are never stashed + # during the rollout of a single episode. + if states: + try: + self._states = cloudpickle.loads(states) + except pickle.UnpicklingError: + # StateBufferConnector states are only needed for rare cases + # like stashing then restoring a policy during the rollout of + # a single episode. + # It is ok to ignore the error for most of the cases here. + logger.info( + "Can not restore StateBufferConnector states. This warning can " + "usually be ignore, unless it is from restoring a stashed policy." + ) + + @override(Connector) + def in_eval(self): + super().in_eval() + + def reset(self, env_id: str): + # States should not be carried over between episodes. + if env_id in self._states: + del self._states[env_id] + + def on_policy_output(self, ac_data: ActionConnectorDataType): + # Buffer latest output states for next input __call__. + self._states[ac_data.env_id][ac_data.agent_id] = ac_data.output + + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + d = ac_data.data + assert ( + type(d) is dict + ), "Single agent data must be of type Dict[str, TensorStructType]" + + env_id = ac_data.env_id + agent_id = ac_data.agent_id + assert ( + env_id is not None and agent_id is not None + ), f"StateBufferConnector requires env_id(f{env_id}) and agent_id(f{agent_id})" + + action, states, fetches = self._states[env_id][agent_id] + + if action is not None: + d[SampleBatch.ACTIONS] = action # Last action + else: + # Default zero action. + d[SampleBatch.ACTIONS] = tree.map_structure( + lambda s: np.zeros_like(s.sample(), s.dtype) + if hasattr(s, "dtype") + else np.zeros_like(s.sample()), + self._action_space_struct, + ) + + if states is None: + states = self._initial_states + if self._enable_new_api_stack: + if states: + d[Columns.STATE_OUT] = states + else: + for i, v in enumerate(states): + d["state_out_{}".format(i)] = v + + # Also add extra fetches if available. + if fetches: + d.update(fetches) + + return ac_data + + def to_state(self): + # Note(jungong) : it is ok to use cloudpickle here for stats because: + # 1. self._states may contain arbitary data objects, and will be hard + # to serialize otherwise. + # 2. seriazlized states are only useful if a policy is stashed and + # restored during the rollout of a single episode. So it is ok to + # use cloudpickle for such non-persistent data bits. + states = cloudpickle.dumps(self._states) + return StateBufferConnector.__name__, states + + @staticmethod + def from_state(ctx: ConnectorContext, params: Any): + return StateBufferConnector(ctx, params) + + +register_connector(StateBufferConnector.__name__, StateBufferConnector) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/synced_filter.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/synced_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..51c625d9aceec9401599ef87b42cf522704c5ea3 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/synced_filter.py @@ -0,0 +1,52 @@ +from ray.rllib.connectors.connector import ( + AgentConnector, + ConnectorContext, +) +from ray.rllib.utils.annotations import OldAPIStack +from ray.rllib.utils.filter import Filter + + +@OldAPIStack +class SyncedFilterAgentConnector(AgentConnector): + """An agent connector that filters with synchronized parameters.""" + + def __init__(self, ctx: ConnectorContext, *args, **kwargs): + super().__init__(ctx) + if args or kwargs: + raise ValueError( + "SyncedFilterAgentConnector does not take any additional arguments, " + "but got args=`{}` and kwargs={}.".format(args, kwargs) + ) + + def apply_changes(self, other: "Filter", *args, **kwargs) -> None: + """Updates self with state from other filter.""" + # TODO: (artur) inline this as soon as we deprecate ordinary filter with + # non-connecto env_runner + return self.filter.apply_changes(other, *args, **kwargs) + + def copy(self) -> "Filter": + """Creates a new object with same state as self. + + This is a legacy Filter method that we need to keep around for now + + Returns: + A copy of self. + """ + # inline this as soon as we deprecate ordinary filter with non-connector + # env_runner + return self.filter.copy() + + def sync(self, other: "AgentConnector") -> None: + """Copies all state from other filter to self.""" + # TODO: (artur) inline this as soon as we deprecate ordinary filter with + # non-connector env_runner + return self.filter.sync(other.filter) + + def reset_state(self) -> None: + """Creates copy of current state and resets accumulated state""" + raise NotImplementedError + + def as_serializable(self) -> "Filter": + # TODO: (artur) inline this as soon as we deprecate ordinary filter with + # non-connector env_runner + return self.filter.as_serializable() diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/view_requirement.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/view_requirement.py new file mode 100644 index 0000000000000000000000000000000000000000..7f035bb97a92c7856ecc11e6dc7eac169a823a51 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/agent/view_requirement.py @@ -0,0 +1,135 @@ +from collections import defaultdict +from typing import Any + +from ray.rllib.connectors.connector import ( + AgentConnector, + ConnectorContext, +) +from ray.rllib.connectors.registry import register_connector +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.typing import ( + AgentConnectorDataType, + AgentConnectorsOutput, +) +from ray.rllib.utils.annotations import OldAPIStack +from ray.rllib.evaluation.collectors.agent_collector import AgentCollector + + +@OldAPIStack +class ViewRequirementAgentConnector(AgentConnector): + """This connector does 2 things: + 1. It filters data columns based on view_requirements for training and inference. + 2. It buffers the right amount of history for computing the sample batch for + action computation. + The output of this connector is AgentConnectorsOut, which basically is + a tuple of 2 things: + { + "raw_dict": {"obs": ...} + "sample_batch": SampleBatch + } + raw_dict, which contains raw data for the latest time slice, + can be used to construct a complete episode by Sampler for training purpose. + The "for_action" SampleBatch can be used to directly call the policy. + """ + + def __init__(self, ctx: ConnectorContext): + super().__init__(ctx) + + self._view_requirements = ctx.view_requirements + _enable_new_api_stack = False + + # a dict of env_id to a dict of agent_id to a list of agent_collector objects + self.agent_collectors = defaultdict( + lambda: defaultdict( + lambda: AgentCollector( + self._view_requirements, + max_seq_len=ctx.config["model"]["max_seq_len"], + intial_states=ctx.initial_states, + disable_action_flattening=ctx.config.get( + "_disable_action_flattening", False + ), + is_policy_recurrent=ctx.is_policy_recurrent, + # Note(jungong): We only leverage AgentCollector for building sample + # batches for computing actions. + # So regardless of whether this ViewRequirement connector is in + # training or inference mode, we should tell these AgentCollectors + # to behave in inference mode, so they don't accumulate episode data + # that is not useful for inference. + is_training=False, + _enable_new_api_stack=_enable_new_api_stack, + ) + ) + ) + + def reset(self, env_id: str): + if env_id in self.agent_collectors: + del self.agent_collectors[env_id] + + def transform(self, ac_data: AgentConnectorDataType) -> AgentConnectorDataType: + d = ac_data.data + assert ( + type(d) is dict + ), "Single agent data must be of type Dict[str, TensorStructType]" + + env_id = ac_data.env_id + agent_id = ac_data.agent_id + # TODO: we don't keep episode_id around so use env_id as episode_id ? + episode_id = env_id if SampleBatch.EPS_ID not in d else d[SampleBatch.EPS_ID] + + assert env_id is not None and agent_id is not None, ( + f"ViewRequirementAgentConnector requires env_id({env_id}) " + "and agent_id({agent_id})" + ) + + assert ( + self._view_requirements + ), "ViewRequirements required by ViewRequirementAgentConnector" + + # Note(jungong) : we need to keep the entire input dict here. + # A column may be used by postprocessing (GAE) even if its + # view_requirement.used_for_training is False. + training_dict = d + + agent_collector = self.agent_collectors[env_id][agent_id] + + if SampleBatch.NEXT_OBS not in d: + raise ValueError(f"connector data {d} should contain next_obs.") + # TODO(avnishn; kourosh) Unsure how agent_index is necessary downstream + # since there is no mapping from agent_index to agent_id that exists. + # need to remove this from the SampleBatch later. + # fall back to using dummy index if no index is available + if SampleBatch.AGENT_INDEX in d: + agent_index = d[SampleBatch.AGENT_INDEX] + else: + try: + agent_index = float(agent_id) + except ValueError: + agent_index = -1 + if agent_collector.is_empty(): + agent_collector.add_init_obs( + episode_id=episode_id, + agent_index=agent_index, + env_id=env_id, + init_obs=d[SampleBatch.NEXT_OBS], + init_infos=d.get(SampleBatch.INFOS), + ) + else: + agent_collector.add_action_reward_next_obs(d) + sample_batch = agent_collector.build_for_inference() + + return_data = AgentConnectorDataType( + env_id, agent_id, AgentConnectorsOutput(training_dict, sample_batch) + ) + return return_data + + def to_state(self): + return ViewRequirementAgentConnector.__name__, None + + @staticmethod + def from_state(ctx: ConnectorContext, params: Any): + return ViewRequirementAgentConnector(ctx) + + +register_connector( + ViewRequirementAgentConnector.__name__, ViewRequirementAgentConnector +) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..96fd6cd51af65e54c933117976a3cc5a9976d42b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__init__.py @@ -0,0 +1,43 @@ +from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( + AddObservationsFromEpisodesToBatch, +) +from ray.rllib.connectors.common.add_states_from_episodes_to_batch import ( + AddStatesFromEpisodesToBatch, +) +from ray.rllib.connectors.common.add_time_dim_to_batch_and_zero_pad import ( + AddTimeDimToBatchAndZeroPad, +) +from ray.rllib.connectors.common.agent_to_module_mapping import AgentToModuleMapping +from ray.rllib.connectors.common.batch_individual_items import BatchIndividualItems +from ray.rllib.connectors.common.numpy_to_tensor import NumpyToTensor +from ray.rllib.connectors.learner.add_columns_from_episodes_to_train_batch import ( + AddColumnsFromEpisodesToTrainBatch, +) +from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa + AddNextObservationsFromEpisodesToTrainBatch, +) +from ray.rllib.connectors.learner.add_one_ts_to_episodes_and_truncate import ( + AddOneTsToEpisodesAndTruncate, +) +from ray.rllib.connectors.learner.compute_returns_to_go import ComputeReturnsToGo +from ray.rllib.connectors.learner.general_advantage_estimation import ( + GeneralAdvantageEstimation, +) +from ray.rllib.connectors.learner.learner_connector_pipeline import ( + LearnerConnectorPipeline, +) + +__all__ = [ + "AddColumnsFromEpisodesToTrainBatch", + "AddNextObservationsFromEpisodesToTrainBatch", + "AddObservationsFromEpisodesToBatch", + "AddOneTsToEpisodesAndTruncate", + "AddStatesFromEpisodesToBatch", + "AddTimeDimToBatchAndZeroPad", + "AgentToModuleMapping", + "BatchIndividualItems", + "ComputeReturnsToGo", + "GeneralAdvantageEstimation", + "LearnerConnectorPipeline", + "NumpyToTensor", +] diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f44ae9a28b8128ffc8801d606b17f008eec52057 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_columns_from_episodes_to_train_batch.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_columns_from_episodes_to_train_batch.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..450004db37c94aebbb3810a555e947a29f680b9d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_columns_from_episodes_to_train_batch.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_next_observations_from_episodes_to_train_batch.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_next_observations_from_episodes_to_train_batch.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a70e5e7744be37b7115f22b0ce73964cca1320a8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/add_next_observations_from_episodes_to_train_batch.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/compute_returns_to_go.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/compute_returns_to_go.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22ecd106766ddefa0220a902ff5c891e25271a84 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/compute_returns_to_go.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/general_advantage_estimation.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/general_advantage_estimation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce192b708d48a3943ed2c3043374db75d8cf2473 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/learner/__pycache__/general_advantage_estimation.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34212d0373b67030538dc2575213f58e5738a805 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/get_actions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/get_actions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e42bc291afbad229c998583677074d4ef24374f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/get_actions.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/module_to_env_pipeline.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/module_to_env_pipeline.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..979480291c34353fd8f6c96674d792d130c9076c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/module_to_env_pipeline.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/normalize_and_clip_actions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/normalize_and_clip_actions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e561477b22ad120d8115026b7b98909655c0554 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/normalize_and_clip_actions.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/remove_single_ts_time_rank_from_batch.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/remove_single_ts_time_rank_from_batch.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d8b579f2e0b3141a4ce19662f95761f898a0d36 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/remove_single_ts_time_rank_from_batch.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/unbatch_to_individual_items.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/unbatch_to_individual_items.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7150ca4395f9d682a77f0c61f11a9d313580461f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/connectors/module_to_env/__pycache__/unbatch_to_individual_items.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..438a8992c4340a1382b2484cf70f5aae1de4691f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/action_dist.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/action_dist.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..305e7c89062d95538df6060d0027859be99ccb34 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/action_dist.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/catalog.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/catalog.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f34762128f1e0d746f4aa98ec18da11af85a804b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/catalog.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/distributions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/distributions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dc8318074487d75a786b0ed2edde69adbc4aaa2 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/distributions.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/modelv2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/modelv2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b33fb7ba59e453fc2f5608f293fc651683196db8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/modelv2.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/preprocessors.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/preprocessors.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad709c46246ac0d15694b434b72f8233d3add08d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/preprocessors.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/repeated_values.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/repeated_values.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e533e95573e55328946077a84f9f8722f62e496e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/repeated_values.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f3fd043257e9963118f8ada6a4f1bad2a610077 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/__pycache__/utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8536bd9866b17066ff454d9cee1714d49b0d320 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/attention_net.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/attention_net.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7644bd62050737deb8ed1507ee56479882bc0747 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/attention_net.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/fcnet.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/fcnet.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4a8a31d0527870f5fdbdf48f1b672ca4612943d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/fcnet.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/recurrent_net.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/recurrent_net.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c468118452f46723185aa3e184f689ba53a640a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/recurrent_net.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_action_dist.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_action_dist.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e76bc524aae52a668972d85426ee1c3f5f3cead4 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_action_dist.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_distributions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_distributions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8bb28debfc4e67540968d19a49c6779819a46336 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_distributions.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_modelv2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_modelv2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a9bdcbb97ed640c72b31e80c8f77b3cb62f28ef Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/tf_modelv2.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/visionnet.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/visionnet.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f5a6dfee8cce5714982dddd8bb358cf16170e41 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/__pycache__/visionnet.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b840c42b17eff05c8e0c7d440a0f2963c9ed35a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__init__.py @@ -0,0 +1,17 @@ +from ray.rllib.models.tf.layers.gru_gate import GRUGate +from ray.rllib.models.tf.layers.noisy_layer import NoisyLayer +from ray.rllib.models.tf.layers.relative_multi_head_attention import ( + PositionalEmbedding, + RelativeMultiHeadAttention, +) +from ray.rllib.models.tf.layers.skip_connection import SkipConnection +from ray.rllib.models.tf.layers.multi_head_attention import MultiHeadAttention + +__all__ = [ + "GRUGate", + "MultiHeadAttention", + "NoisyLayer", + "PositionalEmbedding", + "RelativeMultiHeadAttention", + "SkipConnection", +] diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4b6c23a1c4b4fc80b15a19b6266974636237db0 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/gru_gate.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/gru_gate.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..493ee9bb0a1c354c8190738650729f2694a43aea Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/gru_gate.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/multi_head_attention.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/multi_head_attention.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e4e718b8171c2ebef53346f36e6635b3cb80b7f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/multi_head_attention.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/noisy_layer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/noisy_layer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3dfcce066cc81d706d359cdc64583f714af3e151 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/noisy_layer.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/relative_multi_head_attention.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/relative_multi_head_attention.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26d6464c3875a4ae6a800e15204b684d9df81a69 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/relative_multi_head_attention.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/skip_connection.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/skip_connection.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19c1521c15fa222a422c4be166b2a923dcaf1a7d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/__pycache__/skip_connection.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/gru_gate.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/gru_gate.py new file mode 100644 index 0000000000000000000000000000000000000000..a41b23bbf534a15d0d3c71333bcba1bb0c0a6d3b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/gru_gate.py @@ -0,0 +1,58 @@ +from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.typing import TensorType, TensorShape +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once + +tf1, tf, tfv = try_import_tf() + + +class GRUGate(tf.keras.layers.Layer if tf else object): + def __init__(self, init_bias: float = 0.0, **kwargs): + super().__init__(**kwargs) + self._init_bias = init_bias + if log_once("gru_gate"): + deprecation_warning( + old="rllib.models.tf.layers.GRUGate", + ) + + def build(self, input_shape: TensorShape): + h_shape, x_shape = input_shape + if x_shape[-1] != h_shape[-1]: + raise ValueError( + "Both inputs to GRUGate must have equal size in last axis!" + ) + + dim = int(h_shape[-1]) + self._w_r = self.add_weight(shape=(dim, dim)) + self._w_z = self.add_weight(shape=(dim, dim)) + self._w_h = self.add_weight(shape=(dim, dim)) + + self._u_r = self.add_weight(shape=(dim, dim)) + self._u_z = self.add_weight(shape=(dim, dim)) + self._u_h = self.add_weight(shape=(dim, dim)) + + def bias_initializer(shape, dtype): + return tf.fill(shape, tf.cast(self._init_bias, dtype=dtype)) + + self._bias_z = self.add_weight(shape=(dim,), initializer=bias_initializer) + + def call(self, inputs: TensorType, **kwargs) -> TensorType: + # Pass in internal state first. + h, X = inputs + + r = tf.tensordot(X, self._w_r, axes=1) + tf.tensordot(h, self._u_r, axes=1) + r = tf.nn.sigmoid(r) + + z = ( + tf.tensordot(X, self._w_z, axes=1) + + tf.tensordot(h, self._u_z, axes=1) + - self._bias_z + ) + z = tf.nn.sigmoid(z) + + h_next = tf.tensordot(X, self._w_h, axes=1) + tf.tensordot( + (h * r), self._u_h, axes=1 + ) + h_next = tf.nn.tanh(h_next) + + return (1 - z) * h + z * h_next diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/multi_head_attention.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/multi_head_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..595608989f0b7da66e640a041289ae646cb36ae4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/multi_head_attention.py @@ -0,0 +1,61 @@ +""" +[1] - Attention Is All You Need - Vaswani, Jones, Shazeer, Parmar, + Uszkoreit, Gomez, Kaiser - Google Brain/Research, U Toronto - 2017. + https://arxiv.org/pdf/1706.03762.pdf +""" +from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once + +tf1, tf, tfv = try_import_tf() + + +class MultiHeadAttention(tf.keras.layers.Layer if tf else object): + """A multi-head attention layer described in [1].""" + + def __init__(self, out_dim: int, num_heads: int, head_dim: int, **kwargs): + super().__init__(**kwargs) + + # No bias or non-linearity. + self._num_heads = num_heads + self._head_dim = head_dim + self._qkv_layer = tf.keras.layers.Dense( + 3 * num_heads * head_dim, use_bias=False + ) + self._linear_layer = tf.keras.layers.TimeDistributed( + tf.keras.layers.Dense(out_dim, use_bias=False) + ) + if log_once("multi_head_attention"): + deprecation_warning( + old="rllib.models.tf.layers.MultiHeadAttention", + ) + + def call(self, inputs: TensorType) -> TensorType: + L = tf.shape(inputs)[1] # length of segment + H = self._num_heads # number of attention heads + D = self._head_dim # attention head dimension + + qkv = self._qkv_layer(inputs) + + queries, keys, values = tf.split(qkv, 3, -1) + queries = queries[:, -L:] # only query based on the segment + + queries = tf.reshape(queries, [-1, L, H, D]) + keys = tf.reshape(keys, [-1, L, H, D]) + values = tf.reshape(values, [-1, L, H, D]) + + score = tf.einsum("bihd,bjhd->bijh", queries, keys) + score = score / D**0.5 + + # causal mask of the same length as the sequence + mask = tf.sequence_mask(tf.range(1, L + 1), dtype=score.dtype) + mask = mask[None, :, :, None] + + masked_score = score * mask + 1e30 * (mask - 1.0) + wmat = tf.nn.softmax(masked_score, axis=2) + + out = tf.einsum("bijh,bjhd->bihd", wmat, values) + shape = tf.concat([tf.shape(out)[:2], [H * D]], axis=0) + out = tf.reshape(out, shape) + return self._linear_layer(out) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/noisy_layer.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/noisy_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..5bc149d5de13beee0e77fcda069e321850507633 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/noisy_layer.py @@ -0,0 +1,118 @@ +import numpy as np + +from ray.rllib.models.utils import get_activation_fn +from ray.rllib.utils.framework import ( + get_variable, + try_import_tf, + TensorType, + TensorShape, +) +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once + +tf1, tf, tfv = try_import_tf() + + +class NoisyLayer(tf.keras.layers.Layer if tf else object): + r"""A Layer that adds learnable Noise to some previous layer's outputs. + + Consists of: + - a common dense layer: y = w^{T}x + b + - a noisy layer: y = (w + \epsilon_w*\sigma_w)^{T}x + + (b+\epsilon_b*\sigma_b) + , where \epsilon are random variables sampled from factorized normal + distributions and \sigma are trainable variables which are expected to + vanish along the training procedure. + """ + + def __init__( + self, prefix: str, out_size: int, sigma0: float, activation: str = "relu" + ): + """Initializes a NoisyLayer object. + + Args: + prefix: + out_size: Output size for Noisy Layer + sigma0: Initialization value for sigma_b (bias noise) + non_linear: Non-linear activation for Noisy Layer + """ + super().__init__() + self.prefix = prefix + self.out_size = out_size + # TF noise generation can be unreliable on GPU + # If generating the noise on the CPU, + # lowering sigma0 to 0.1 may be helpful + self.sigma0 = sigma0 # 0.5~GPU, 0.1~CPU + self.activation = activation + # Variables. + self.w = None # Weight matrix. + self.b = None # Biases. + self.sigma_w = None # Noise for weight matrix + self.sigma_b = None # Noise for biases. + if log_once("noisy_layer"): + deprecation_warning( + old="rllib.models.tf.layers.NoisyLayer", + ) + + def build(self, input_shape: TensorShape): + in_size = int(input_shape[1]) + + self.sigma_w = get_variable( + value=tf.keras.initializers.RandomUniform( + minval=-1.0 / np.sqrt(float(in_size)), + maxval=1.0 / np.sqrt(float(in_size)), + ), + trainable=True, + tf_name=self.prefix + "_sigma_w", + shape=[in_size, self.out_size], + dtype=tf.float32, + ) + + self.sigma_b = get_variable( + value=tf.keras.initializers.Constant(self.sigma0 / np.sqrt(float(in_size))), + trainable=True, + tf_name=self.prefix + "_sigma_b", + shape=[self.out_size], + dtype=tf.float32, + ) + + self.w = get_variable( + value=tf.keras.initializers.GlorotUniform(), + tf_name=self.prefix + "_fc_w", + trainable=True, + shape=[in_size, self.out_size], + dtype=tf.float32, + ) + + self.b = get_variable( + value=tf.keras.initializers.Zeros(), + tf_name=self.prefix + "_fc_b", + trainable=True, + shape=[self.out_size], + dtype=tf.float32, + ) + + def call(self, inputs: TensorType) -> TensorType: + in_size = int(inputs.shape[1]) + epsilon_in = tf.random.normal(shape=[in_size]) + epsilon_out = tf.random.normal(shape=[self.out_size]) + epsilon_in = self._f_epsilon(epsilon_in) + epsilon_out = self._f_epsilon(epsilon_out) + epsilon_w = tf.matmul( + a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0) + ) + epsilon_b = epsilon_out + + action_activation = ( + tf.matmul(inputs, self.w + self.sigma_w * epsilon_w) + + self.b + + self.sigma_b * epsilon_b + ) + + fn = get_activation_fn(self.activation, framework="tf") + if fn is not None: + action_activation = fn(action_activation) + return action_activation + + def _f_epsilon(self, x: TensorType) -> TensorType: + return tf.math.sign(x) * tf.math.sqrt(tf.math.abs(x)) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/relative_multi_head_attention.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/relative_multi_head_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..f88486ff20516c19fcebdab3718fc829591215fb --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/relative_multi_head_attention.py @@ -0,0 +1,147 @@ +from typing import Optional + +from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once + +tf1, tf, tfv = try_import_tf() + + +class RelativeMultiHeadAttention(tf.keras.layers.Layer if tf else object): + """A RelativeMultiHeadAttention layer as described in [3]. + + Uses segment level recurrence with state reuse. + """ + + def __init__( + self, + out_dim: int, + num_heads: int, + head_dim: int, + input_layernorm: bool = False, + output_activation: Optional["tf.nn.activation"] = None, + **kwargs + ): + """Initializes a RelativeMultiHeadAttention keras Layer object. + + Args: + out_dim: The output dimensions of the multi-head attention + unit. + num_heads: The number of attention heads to use. + Denoted `H` in [2]. + head_dim: The dimension of a single(!) attention head within + a multi-head attention unit. Denoted as `d` in [3]. + input_layernorm: Whether to prepend a LayerNorm before + everything else. Should be True for building a GTrXL. + output_activation (Optional[tf.nn.activation]): Optional tf.nn + activation function. Should be relu for GTrXL. + **kwargs: + """ + if log_once("relative_multi_head_attention"): + deprecation_warning( + old="rllib.models.tf.layers.RelativeMultiHeadAttention", + ) + super().__init__(**kwargs) + + # No bias or non-linearity. + self._num_heads = num_heads + self._head_dim = head_dim + # 3=Query, key, and value inputs. + self._qkv_layer = tf.keras.layers.Dense( + 3 * num_heads * head_dim, use_bias=False + ) + self._linear_layer = tf.keras.layers.TimeDistributed( + tf.keras.layers.Dense(out_dim, use_bias=False, activation=output_activation) + ) + + self._uvar = self.add_weight(shape=(num_heads, head_dim)) + self._vvar = self.add_weight(shape=(num_heads, head_dim)) + + # Constant (non-trainable) sinusoid rel pos encoding matrix, which + # depends on this incoming time dimension. + # For inference, we prepend the memory to the current timestep's + # input: Tau + 1. For training, we prepend the memory to the input + # sequence: Tau + T. + self._pos_embedding = PositionalEmbedding(out_dim) + self._pos_proj = tf.keras.layers.Dense(num_heads * head_dim, use_bias=False) + + self._input_layernorm = None + if input_layernorm: + self._input_layernorm = tf.keras.layers.LayerNormalization(axis=-1) + + def call( + self, inputs: TensorType, memory: Optional[TensorType] = None + ) -> TensorType: + T = tf.shape(inputs)[1] # length of segment (time) + H = self._num_heads # number of attention heads + d = self._head_dim # attention head dimension + + # Add previous memory chunk (as const, w/o gradient) to input. + # Tau (number of (prev) time slices in each memory chunk). + Tau = tf.shape(memory)[1] + inputs = tf.concat([tf.stop_gradient(memory), inputs], axis=1) + + # Apply the Layer-Norm. + if self._input_layernorm is not None: + inputs = self._input_layernorm(inputs) + + qkv = self._qkv_layer(inputs) + + queries, keys, values = tf.split(qkv, 3, -1) + # Cut out memory timesteps from query. + queries = queries[:, -T:] + + # Splitting up queries into per-head dims (d). + queries = tf.reshape(queries, [-1, T, H, d]) + keys = tf.reshape(keys, [-1, Tau + T, H, d]) + values = tf.reshape(values, [-1, Tau + T, H, d]) + + R = self._pos_embedding(Tau + T) + R = self._pos_proj(R) + R = tf.reshape(R, [Tau + T, H, d]) + + # b=batch + # i and j=time indices (i=max-timesteps (inputs); j=Tau memory space) + # h=head + # d=head-dim (over which we will reduce-sum) + score = tf.einsum("bihd,bjhd->bijh", queries + self._uvar, keys) + pos_score = tf.einsum("bihd,jhd->bijh", queries + self._vvar, R) + score = score + self.rel_shift(pos_score) + score = score / d**0.5 + + # Causal mask of the same length as the sequence. + mask = tf.sequence_mask(tf.range(Tau + 1, Tau + T + 1), dtype=score.dtype) + mask = mask[None, :, :, None] + + masked_score = score * mask + 1e30 * (mask - 1.0) + wmat = tf.nn.softmax(masked_score, axis=2) + + out = tf.einsum("bijh,bjhd->bihd", wmat, values) + out = tf.reshape(out, tf.concat((tf.shape(out)[:2], [H * d]), axis=0)) + return self._linear_layer(out) + + @staticmethod + def rel_shift(x: TensorType) -> TensorType: + # Transposed version of the shift approach described in [3]. + # https://github.com/kimiyoung/transformer-xl/blob/ + # 44781ed21dbaec88b280f74d9ae2877f52b492a5/tf/model.py#L31 + x_size = tf.shape(x) + + x = tf.pad(x, [[0, 0], [0, 0], [1, 0], [0, 0]]) + x = tf.reshape(x, [x_size[0], x_size[2] + 1, x_size[1], x_size[3]]) + x = x[:, 1:, :, :] + x = tf.reshape(x, x_size) + + return x + + +class PositionalEmbedding(tf.keras.layers.Layer if tf else object): + def __init__(self, out_dim, **kwargs): + super().__init__(**kwargs) + self.inverse_freq = 1 / (10000 ** (tf.range(0, out_dim, 2.0) / out_dim)) + + def call(self, seq_length): + pos_offsets = tf.cast(tf.range(seq_length - 1, -1, -1), tf.float32) + inputs = pos_offsets[:, None] * self.inverse_freq[None, :] + return tf.concat((tf.sin(inputs), tf.cos(inputs)), axis=-1) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/skip_connection.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/skip_connection.py new file mode 100644 index 0000000000000000000000000000000000000000..3ee1751caf36e4a760da3c60c08fe279400dcb12 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/tf/layers/skip_connection.py @@ -0,0 +1,46 @@ +from typing import Optional, Any + +from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once + +tf1, tf, tfv = try_import_tf() + + +class SkipConnection(tf.keras.layers.Layer if tf else object): + """Skip connection layer. + + Adds the original input to the output (regular residual layer) OR uses + input as hidden state input to a given fan_in_layer. + """ + + def __init__(self, layer: Any, fan_in_layer: Optional[Any] = None, **kwargs): + """Initializes a SkipConnection keras layer object. + + Args: + layer (tf.keras.layers.Layer): Any layer processing inputs. + fan_in_layer (Optional[tf.keras.layers.Layer]): An optional + layer taking two inputs: The original input and the output + of `layer`. + """ + if log_once("skip_connection"): + deprecation_warning( + old="rllib.models.tf.layers.SkipConnection", + ) + super().__init__(**kwargs) + self._layer = layer + self._fan_in_layer = fan_in_layer + + def call(self, inputs: TensorType, **kwargs) -> TensorType: + # del kwargs + outputs = self._layer(inputs, **kwargs) + # Residual case, just add inputs to outputs. + if self._fan_in_layer is None: + outputs = outputs + inputs + # Fan-in e.g. RNN: Call fan-in with `inputs` and `outputs`. + else: + # NOTE: In the GRU case, `inputs` is the state input. + outputs = self._fan_in_layer((inputs, outputs)) + + return outputs diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..abbe5ef604646d8de477335e18c0bc4c88363b2c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__init__.py @@ -0,0 +1,12 @@ +# from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +# from ray.rllib.models.torch.fcnet import FullyConnectedNetwork +# from ray.rllib.models.torch.recurrent_net import \ +# RecurrentNetwork +# from ray.rllib.models.torch.visionnet import VisionNetwork + +# __all__ = [ +# "FullyConnectedNetwork", +# "RecurrentNetwork", +# "TorchModelV2", +# "VisionNetwork", +# ] diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__pycache__/misc.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__pycache__/misc.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..daf29d6b7801d9d2f81fcd176f71fd815c7f5174 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__pycache__/misc.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__pycache__/visionnet.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__pycache__/visionnet.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f771135e2383b78690ea04c835cceaa79d8e0061 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/__pycache__/visionnet.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/attention_net.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/attention_net.py new file mode 100644 index 0000000000000000000000000000000000000000..2382a4da1381a7dad748155498578009b13170e0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/attention_net.py @@ -0,0 +1,457 @@ +""" +[1] - Attention Is All You Need - Vaswani, Jones, Shazeer, Parmar, + Uszkoreit, Gomez, Kaiser - Google Brain/Research, U Toronto - 2017. + https://arxiv.org/pdf/1706.03762.pdf +[2] - Stabilizing Transformers for Reinforcement Learning - E. Parisotto + et al. - DeepMind - 2019. https://arxiv.org/pdf/1910.06764.pdf +[3] - Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context. + Z. Dai, Z. Yang, et al. - Carnegie Mellon U - 2019. + https://www.aclweb.org/anthology/P19-1285.pdf +""" +import gymnasium as gym +from gymnasium.spaces import Box, Discrete, MultiDiscrete +import numpy as np +import tree # pip install dm_tree +from typing import Dict, Optional, Union + +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.models.torch.misc import SlimFC +from ray.rllib.models.torch.modules import ( + GRUGate, + RelativeMultiHeadAttention, + SkipConnection, +) +from ray.rllib.models.torch.recurrent_net import RecurrentNetwork +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.view_requirement import ViewRequirement +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space +from ray.rllib.utils.torch_utils import flatten_inputs_to_1d_tensor, one_hot +from ray.rllib.utils.typing import ModelConfigDict, TensorType, List +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once + +torch, nn = try_import_torch() + + +@OldAPIStack +class GTrXLNet(RecurrentNetwork, nn.Module): + """A GTrXL net Model described in [2]. + + This is still in an experimental phase. + Can be used as a drop-in replacement for LSTMs in PPO and IMPALA. + + To use this network as a replacement for an RNN, configure your Algorithm + as follows: + + Examples: + >> config["model"]["custom_model"] = GTrXLNet + >> config["model"]["max_seq_len"] = 10 + >> config["model"]["custom_model_config"] = { + >> num_transformer_units=1, + >> attention_dim=32, + >> num_heads=2, + >> memory_tau=50, + >> etc.. + >> } + """ + + def __init__( + self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + num_outputs: Optional[int], + model_config: ModelConfigDict, + name: str, + *, + num_transformer_units: int = 1, + attention_dim: int = 64, + num_heads: int = 2, + memory_inference: int = 50, + memory_training: int = 50, + head_dim: int = 32, + position_wise_mlp_dim: int = 32, + init_gru_gate_bias: float = 2.0 + ): + """Initializes a GTrXLNet. + + Args: + num_transformer_units: The number of Transformer repeats to + use (denoted L in [2]). + attention_dim: The input and output dimensions of one + Transformer unit. + num_heads: The number of attention heads to use in parallel. + Denoted as `H` in [3]. + memory_inference: The number of timesteps to concat (time + axis) and feed into the next transformer unit as inference + input. The first transformer unit will receive this number of + past observations (plus the current one), instead. + memory_training: The number of timesteps to concat (time + axis) and feed into the next transformer unit as training + input (plus the actual input sequence of len=max_seq_len). + The first transformer unit will receive this number of + past observations (plus the input sequence), instead. + head_dim: The dimension of a single(!) attention head within + a multi-head attention unit. Denoted as `d` in [3]. + position_wise_mlp_dim: The dimension of the hidden layer + within the position-wise MLP (after the multi-head attention + block within one Transformer unit). This is the size of the + first of the two layers within the PositionwiseFeedforward. The + second layer always has size=`attention_dim`. + init_gru_gate_bias: Initial bias values for the GRU gates + (two GRUs per Transformer unit, one after the MHA, one after + the position-wise MLP). + """ + super().__init__( + observation_space, action_space, num_outputs, model_config, name + ) + + nn.Module.__init__(self) + + self.num_transformer_units = num_transformer_units + self.attention_dim = attention_dim + self.num_heads = num_heads + self.memory_inference = memory_inference + self.memory_training = memory_training + self.head_dim = head_dim + self.max_seq_len = model_config["max_seq_len"] + self.obs_dim = observation_space.shape[0] + + self.linear_layer = SlimFC(in_size=self.obs_dim, out_size=self.attention_dim) + + self.layers = [self.linear_layer] + + attention_layers = [] + # 2) Create L Transformer blocks according to [2]. + for i in range(self.num_transformer_units): + # RelativeMultiHeadAttention part. + MHA_layer = SkipConnection( + RelativeMultiHeadAttention( + in_dim=self.attention_dim, + out_dim=self.attention_dim, + num_heads=num_heads, + head_dim=head_dim, + input_layernorm=True, + output_activation=nn.ReLU, + ), + fan_in_layer=GRUGate(self.attention_dim, init_gru_gate_bias), + ) + + # Position-wise MultiLayerPerceptron part. + E_layer = SkipConnection( + nn.Sequential( + torch.nn.LayerNorm(self.attention_dim), + SlimFC( + in_size=self.attention_dim, + out_size=position_wise_mlp_dim, + use_bias=False, + activation_fn=nn.ReLU, + ), + SlimFC( + in_size=position_wise_mlp_dim, + out_size=self.attention_dim, + use_bias=False, + activation_fn=nn.ReLU, + ), + ), + fan_in_layer=GRUGate(self.attention_dim, init_gru_gate_bias), + ) + + # Build a list of all attanlayers in order. + attention_layers.extend([MHA_layer, E_layer]) + + # Create a Sequential such that all parameters inside the attention + # layers are automatically registered with this top-level model. + self.attention_layers = nn.Sequential(*attention_layers) + self.layers.extend(attention_layers) + + # Final layers if num_outputs not None. + self.logits = None + self.values_out = None + # Last value output. + self._value_out = None + # Postprocess GTrXL output with another hidden layer. + if self.num_outputs is not None: + self.logits = SlimFC( + in_size=self.attention_dim, + out_size=self.num_outputs, + activation_fn=nn.ReLU, + ) + + # Value function used by all RLlib Torch RL implementations. + self.values_out = SlimFC( + in_size=self.attention_dim, out_size=1, activation_fn=None + ) + else: + self.num_outputs = self.attention_dim + + # Setup trajectory views (`memory-inference` x past memory outs). + for i in range(self.num_transformer_units): + space = Box(-1.0, 1.0, shape=(self.attention_dim,)) + self.view_requirements["state_in_{}".format(i)] = ViewRequirement( + "state_out_{}".format(i), + shift="-{}:-1".format(self.memory_inference), + # Repeat the incoming state every max-seq-len times. + batch_repeat_value=self.max_seq_len, + space=space, + ) + self.view_requirements["state_out_{}".format(i)] = ViewRequirement( + space=space, used_for_training=False + ) + + @override(ModelV2) + def forward( + self, input_dict, state: List[TensorType], seq_lens: TensorType + ) -> (TensorType, List[TensorType]): + assert seq_lens is not None + + # Add the needed batch rank (tf Models' Input requires this). + observations = input_dict[SampleBatch.OBS] + # Add the time dim to observations. + B = len(seq_lens) + T = observations.shape[0] // B + observations = torch.reshape( + observations, [-1, T] + list(observations.shape[1:]) + ) + + all_out = observations + memory_outs = [] + for i in range(len(self.layers)): + # MHA layers which need memory passed in. + if i % 2 == 1: + all_out = self.layers[i](all_out, memory=state[i // 2]) + # Either self.linear_layer (initial obs -> attn. dim layer) or + # MultiLayerPerceptrons. The output of these layers is always the + # memory for the next forward pass. + else: + all_out = self.layers[i](all_out) + memory_outs.append(all_out) + + # Discard last output (not needed as a memory since it's the last + # layer). + memory_outs = memory_outs[:-1] + + if self.logits is not None: + out = self.logits(all_out) + self._value_out = self.values_out(all_out) + out_dim = self.num_outputs + else: + out = all_out + out_dim = self.attention_dim + + return torch.reshape(out, [-1, out_dim]), [ + torch.reshape(m, [-1, self.attention_dim]) for m in memory_outs + ] + + # TODO: (sven) Deprecate this once trajectory view API has fully matured. + @override(RecurrentNetwork) + def get_initial_state(self) -> List[np.ndarray]: + return [] + + @override(ModelV2) + def value_function(self) -> TensorType: + assert ( + self._value_out is not None + ), "Must call forward first AND must have value branch!" + return torch.reshape(self._value_out, [-1]) + + +class AttentionWrapper(TorchModelV2, nn.Module): + """GTrXL wrapper serving as interface for ModelV2s that set use_attention.""" + + def __init__( + self, + obs_space: gym.spaces.Space, + action_space: gym.spaces.Space, + num_outputs: int, + model_config: ModelConfigDict, + name: str, + ): + if log_once("deprecate_attention_wrapper_torch"): + deprecation_warning( + old="ray.rllib.models.torch.attention_net.AttentionWrapper" + ) + + nn.Module.__init__(self) + super().__init__(obs_space, action_space, None, model_config, name) + + self.use_n_prev_actions = model_config["attention_use_n_prev_actions"] + self.use_n_prev_rewards = model_config["attention_use_n_prev_rewards"] + + self.action_space_struct = get_base_struct_from_space(self.action_space) + self.action_dim = 0 + + for space in tree.flatten(self.action_space_struct): + if isinstance(space, Discrete): + self.action_dim += space.n + elif isinstance(space, MultiDiscrete): + self.action_dim += np.sum(space.nvec) + elif space.shape is not None: + self.action_dim += int(np.prod(space.shape)) + else: + self.action_dim += int(len(space)) + + # Add prev-action/reward nodes to input to LSTM. + if self.use_n_prev_actions: + self.num_outputs += self.use_n_prev_actions * self.action_dim + if self.use_n_prev_rewards: + self.num_outputs += self.use_n_prev_rewards + + cfg = model_config + + self.attention_dim = cfg["attention_dim"] + + if self.num_outputs is not None: + in_space = gym.spaces.Box( + float("-inf"), float("inf"), shape=(self.num_outputs,), dtype=np.float32 + ) + else: + in_space = obs_space + + # Construct GTrXL sub-module w/ num_outputs=None (so it does not + # create a logits/value output; we'll do this ourselves in this wrapper + # here). + self.gtrxl = GTrXLNet( + in_space, + action_space, + None, + model_config, + "gtrxl", + num_transformer_units=cfg["attention_num_transformer_units"], + attention_dim=self.attention_dim, + num_heads=cfg["attention_num_heads"], + head_dim=cfg["attention_head_dim"], + memory_inference=cfg["attention_memory_inference"], + memory_training=cfg["attention_memory_training"], + position_wise_mlp_dim=cfg["attention_position_wise_mlp_dim"], + init_gru_gate_bias=cfg["attention_init_gru_gate_bias"], + ) + + # Set final num_outputs to correct value (depending on action space). + self.num_outputs = num_outputs + + # Postprocess GTrXL output with another hidden layer and compute + # values. + self._logits_branch = SlimFC( + in_size=self.attention_dim, + out_size=self.num_outputs, + activation_fn=None, + initializer=torch.nn.init.xavier_uniform_, + ) + self._value_branch = SlimFC( + in_size=self.attention_dim, + out_size=1, + activation_fn=None, + initializer=torch.nn.init.xavier_uniform_, + ) + + self.view_requirements = self.gtrxl.view_requirements + self.view_requirements["obs"].space = self.obs_space + + # Add prev-a/r to this model's view, if required. + if self.use_n_prev_actions: + self.view_requirements[SampleBatch.PREV_ACTIONS] = ViewRequirement( + SampleBatch.ACTIONS, + space=self.action_space, + shift="-{}:-1".format(self.use_n_prev_actions), + ) + if self.use_n_prev_rewards: + self.view_requirements[SampleBatch.PREV_REWARDS] = ViewRequirement( + SampleBatch.REWARDS, shift="-{}:-1".format(self.use_n_prev_rewards) + ) + + @override(RecurrentNetwork) + def forward( + self, + input_dict: Dict[str, TensorType], + state: List[TensorType], + seq_lens: TensorType, + ) -> (TensorType, List[TensorType]): + assert seq_lens is not None + # Push obs through "unwrapped" net's `forward()` first. + wrapped_out, _ = self._wrapped_forward(input_dict, [], None) + + # Concat. prev-action/reward if required. + prev_a_r = [] + + # Prev actions. + if self.use_n_prev_actions: + prev_n_actions = input_dict[SampleBatch.PREV_ACTIONS] + # If actions are not processed yet (in their original form as + # have been sent to environment): + # Flatten/one-hot into 1D array. + if self.model_config["_disable_action_flattening"]: + # Merge prev n actions into flat tensor. + flat = flatten_inputs_to_1d_tensor( + prev_n_actions, + spaces_struct=self.action_space_struct, + time_axis=True, + ) + # Fold time-axis into flattened data. + flat = torch.reshape(flat, [flat.shape[0], -1]) + prev_a_r.append(flat) + # If actions are already flattened (but not one-hot'd yet!), + # one-hot discrete/multi-discrete actions here and concatenate the + # n most recent actions together. + else: + if isinstance(self.action_space, Discrete): + for i in range(self.use_n_prev_actions): + prev_a_r.append( + one_hot( + prev_n_actions[:, i].float(), space=self.action_space + ) + ) + elif isinstance(self.action_space, MultiDiscrete): + for i in range( + 0, self.use_n_prev_actions, self.action_space.shape[0] + ): + prev_a_r.append( + one_hot( + prev_n_actions[ + :, i : i + self.action_space.shape[0] + ].float(), + space=self.action_space, + ) + ) + else: + prev_a_r.append( + torch.reshape( + prev_n_actions.float(), + [-1, self.use_n_prev_actions * self.action_dim], + ) + ) + # Prev rewards. + if self.use_n_prev_rewards: + prev_a_r.append( + torch.reshape( + input_dict[SampleBatch.PREV_REWARDS].float(), + [-1, self.use_n_prev_rewards], + ) + ) + + # Concat prev. actions + rewards to the "main" input. + if prev_a_r: + wrapped_out = torch.cat([wrapped_out] + prev_a_r, dim=1) + + # Then through our GTrXL. + input_dict["obs_flat"] = input_dict["obs"] = wrapped_out + + self._features, memory_outs = self.gtrxl(input_dict, state, seq_lens) + model_out = self._logits_branch(self._features) + return model_out, memory_outs + + @override(ModelV2) + def get_initial_state(self) -> Union[List[np.ndarray], List[TensorType]]: + return [ + torch.zeros( + self.gtrxl.view_requirements["state_in_{}".format(i)].space.shape + ) + for i in range(self.gtrxl.num_transformer_units) + ] + + @override(ModelV2) + def value_function(self) -> TensorType: + assert self._features is not None, "Must call forward() first!" + return torch.reshape(self._value_branch(self._features), [-1]) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/complex_input_net.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/complex_input_net.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c81dba790c4f97f2c6ab5d2b765af0691005c8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/complex_input_net.py @@ -0,0 +1,237 @@ +from gymnasium.spaces import Box, Discrete, MultiDiscrete +import numpy as np +import tree # pip install dm_tree + +from ray.rllib.models.torch.misc import ( + normc_initializer as torch_normc_initializer, + SlimFC, +) +from ray.rllib.models.catalog import ModelCatalog +from ray.rllib.models.modelv2 import ModelV2, restore_original_dimensions +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.models.utils import get_filter_config +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.spaces.space_utils import flatten_space +from ray.rllib.utils.torch_utils import one_hot + +torch, nn = try_import_torch() + + +@OldAPIStack +class ComplexInputNetwork(TorchModelV2, nn.Module): + """TorchModelV2 concat'ing CNN outputs to flat input(s), followed by FC(s). + + Note: This model should be used for complex (Dict or Tuple) observation + spaces that have one or more image components. + + The data flow is as follows: + + `obs` (e.g. Tuple[img0, img1, discrete0]) -> `CNN0 + CNN1 + ONE-HOT` + `CNN0 + CNN1 + ONE-HOT` -> concat all flat outputs -> `out` + `out` -> (optional) FC-stack -> `out2` + `out2` -> action (logits) and value heads. + """ + + def __init__(self, obs_space, action_space, num_outputs, model_config, name): + self.original_space = ( + obs_space.original_space + if hasattr(obs_space, "original_space") + else obs_space + ) + + self.processed_obs_space = ( + self.original_space + if model_config.get("_disable_preprocessor_api") + else obs_space + ) + + nn.Module.__init__(self) + TorchModelV2.__init__( + self, self.original_space, action_space, num_outputs, model_config, name + ) + + self.flattened_input_space = flatten_space(self.original_space) + + # Atari type CNNs or IMPALA type CNNs (with residual layers)? + # self.cnn_type = self.model_config["custom_model_config"].get( + # "conv_type", "atari") + + # Build the CNN(s) given obs_space's image components. + self.cnns = nn.ModuleDict() + self.one_hot = nn.ModuleDict() + self.flatten_dims = {} + self.flatten = nn.ModuleDict() + concat_size = 0 + for i, component in enumerate(self.flattened_input_space): + i = str(i) + # Image space. + if len(component.shape) == 3 and isinstance(component, Box): + config = { + "conv_filters": model_config["conv_filters"] + if "conv_filters" in model_config + else get_filter_config(component.shape), + "conv_activation": model_config.get("conv_activation"), + "post_fcnet_hiddens": [], + } + # if self.cnn_type == "atari": + self.cnns[i] = ModelCatalog.get_model_v2( + component, + action_space, + num_outputs=None, + model_config=config, + framework="torch", + name="cnn_{}".format(i), + ) + # TODO (sven): add IMPALA-style option. + # else: + # cnn = TorchImpalaVisionNet( + # component, + # action_space, + # num_outputs=None, + # model_config=config, + # name="cnn_{}".format(i)) + + concat_size += self.cnns[i].num_outputs + self.add_module("cnn_{}".format(i), self.cnns[i]) + # Discrete|MultiDiscrete inputs -> One-hot encode. + elif isinstance(component, (Discrete, MultiDiscrete)): + if isinstance(component, Discrete): + size = component.n + else: + size = np.sum(component.nvec) + config = { + "fcnet_hiddens": model_config["fcnet_hiddens"], + "fcnet_activation": model_config.get("fcnet_activation"), + "post_fcnet_hiddens": [], + } + self.one_hot[i] = ModelCatalog.get_model_v2( + Box(-1.0, 1.0, (size,), np.float32), + action_space, + num_outputs=None, + model_config=config, + framework="torch", + name="one_hot_{}".format(i), + ) + concat_size += self.one_hot[i].num_outputs + self.add_module("one_hot_{}".format(i), self.one_hot[i]) + # Everything else (1D Box). + else: + size = int(np.prod(component.shape)) + config = { + "fcnet_hiddens": model_config["fcnet_hiddens"], + "fcnet_activation": model_config.get("fcnet_activation"), + "post_fcnet_hiddens": [], + } + self.flatten[i] = ModelCatalog.get_model_v2( + Box(-1.0, 1.0, (size,), np.float32), + action_space, + num_outputs=None, + model_config=config, + framework="torch", + name="flatten_{}".format(i), + ) + self.flatten_dims[i] = size + concat_size += self.flatten[i].num_outputs + self.add_module("flatten_{}".format(i), self.flatten[i]) + + # Optional post-concat FC-stack. + post_fc_stack_config = { + "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []), + "fcnet_activation": model_config.get("post_fcnet_activation", "relu"), + } + self.post_fc_stack = ModelCatalog.get_model_v2( + Box(float("-inf"), float("inf"), shape=(concat_size,), dtype=np.float32), + self.action_space, + None, + post_fc_stack_config, + framework="torch", + name="post_fc_stack", + ) + + # Actions and value heads. + self.logits_layer = None + self.value_layer = None + self._value_out = None + + if num_outputs: + # Action-distribution head. + self.logits_layer = SlimFC( + in_size=self.post_fc_stack.num_outputs, + out_size=num_outputs, + activation_fn=None, + initializer=torch_normc_initializer(0.01), + ) + # Create the value branch model. + self.value_layer = SlimFC( + in_size=self.post_fc_stack.num_outputs, + out_size=1, + activation_fn=None, + initializer=torch_normc_initializer(0.01), + ) + else: + self.num_outputs = concat_size + + @override(ModelV2) + def forward(self, input_dict, state, seq_lens): + if SampleBatch.OBS in input_dict and "obs_flat" in input_dict: + orig_obs = input_dict[SampleBatch.OBS] + else: + orig_obs = restore_original_dimensions( + input_dict[SampleBatch.OBS], self.processed_obs_space, tensorlib="torch" + ) + # Push observations through the different components + # (CNNs, one-hot + FC, etc..). + outs = [] + for i, component in enumerate(tree.flatten(orig_obs)): + i = str(i) + if i in self.cnns: + cnn_out, _ = self.cnns[i](SampleBatch({SampleBatch.OBS: component})) + outs.append(cnn_out) + elif i in self.one_hot: + if component.dtype in [ + torch.int8, + torch.int16, + torch.int32, + torch.int64, + torch.uint8, + ]: + one_hot_in = { + SampleBatch.OBS: one_hot( + component, self.flattened_input_space[int(i)] + ) + } + else: + one_hot_in = {SampleBatch.OBS: component} + one_hot_out, _ = self.one_hot[i](SampleBatch(one_hot_in)) + outs.append(one_hot_out) + else: + nn_out, _ = self.flatten[i]( + SampleBatch( + { + SampleBatch.OBS: torch.reshape( + component, [-1, self.flatten_dims[i]] + ) + } + ) + ) + outs.append(nn_out) + + # Concat all outputs and the non-image inputs. + out = torch.cat(outs, dim=1) + # Push through (optional) FC-stack (this may be an empty stack). + out, _ = self.post_fc_stack(SampleBatch({SampleBatch.OBS: out})) + + # No logits/value branches. + if self.logits_layer is None: + return out, [] + + # Logits- and value branches. + logits, values = self.logits_layer(out), self.value_layer(out) + self._value_out = torch.reshape(values, [-1]) + return logits, [] + + @override(ModelV2) + def value_function(self): + return self._value_out diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/fcnet.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/fcnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2ba907a54ed09b207c62a161ab54a27f89a281f4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/fcnet.py @@ -0,0 +1,160 @@ +import logging +import numpy as np +import gymnasium as gym + +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.models.torch.misc import SlimFC, AppendBiasLayer, normc_initializer +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.typing import Dict, TensorType, List, ModelConfigDict + +torch, nn = try_import_torch() + +logger = logging.getLogger(__name__) + + +@OldAPIStack +class FullyConnectedNetwork(TorchModelV2, nn.Module): + """Generic fully connected network.""" + + def __init__( + self, + obs_space: gym.spaces.Space, + action_space: gym.spaces.Space, + num_outputs: int, + model_config: ModelConfigDict, + name: str, + ): + TorchModelV2.__init__( + self, obs_space, action_space, num_outputs, model_config, name + ) + nn.Module.__init__(self) + + hiddens = list(model_config.get("fcnet_hiddens", [])) + list( + model_config.get("post_fcnet_hiddens", []) + ) + activation = model_config.get("fcnet_activation") + if not model_config.get("fcnet_hiddens", []): + activation = model_config.get("post_fcnet_activation") + no_final_linear = model_config.get("no_final_linear") + self.vf_share_layers = model_config.get("vf_share_layers") + self.free_log_std = model_config.get("free_log_std") + # Generate free-floating bias variables for the second half of + # the outputs. + if self.free_log_std: + assert num_outputs % 2 == 0, ( + "num_outputs must be divisible by two", + num_outputs, + ) + num_outputs = num_outputs // 2 + + layers = [] + prev_layer_size = int(np.prod(obs_space.shape)) + self._logits = None + + # Create layers 0 to second-last. + for size in hiddens[:-1]: + layers.append( + SlimFC( + in_size=prev_layer_size, + out_size=size, + initializer=normc_initializer(1.0), + activation_fn=activation, + ) + ) + prev_layer_size = size + + # The last layer is adjusted to be of size num_outputs, but it's a + # layer with activation. + if no_final_linear and num_outputs: + layers.append( + SlimFC( + in_size=prev_layer_size, + out_size=num_outputs, + initializer=normc_initializer(1.0), + activation_fn=activation, + ) + ) + prev_layer_size = num_outputs + # Finish the layers with the provided sizes (`hiddens`), plus - + # iff num_outputs > 0 - a last linear layer of size num_outputs. + else: + if len(hiddens) > 0: + layers.append( + SlimFC( + in_size=prev_layer_size, + out_size=hiddens[-1], + initializer=normc_initializer(1.0), + activation_fn=activation, + ) + ) + prev_layer_size = hiddens[-1] + if num_outputs: + self._logits = SlimFC( + in_size=prev_layer_size, + out_size=num_outputs, + initializer=normc_initializer(0.01), + activation_fn=None, + ) + else: + self.num_outputs = ([int(np.prod(obs_space.shape))] + hiddens[-1:])[-1] + + # Layer to add the log std vars to the state-dependent means. + if self.free_log_std and self._logits: + self._append_free_log_std = AppendBiasLayer(num_outputs) + + self._hidden_layers = nn.Sequential(*layers) + + self._value_branch_separate = None + if not self.vf_share_layers: + # Build a parallel set of hidden layers for the value net. + prev_vf_layer_size = int(np.prod(obs_space.shape)) + vf_layers = [] + for size in hiddens: + vf_layers.append( + SlimFC( + in_size=prev_vf_layer_size, + out_size=size, + activation_fn=activation, + initializer=normc_initializer(1.0), + ) + ) + prev_vf_layer_size = size + self._value_branch_separate = nn.Sequential(*vf_layers) + + self._value_branch = SlimFC( + in_size=prev_layer_size, + out_size=1, + initializer=normc_initializer(0.01), + activation_fn=None, + ) + # Holds the current "base" output (before logits layer). + self._features = None + # Holds the last input, in case value branch is separate. + self._last_flat_in = None + + @override(TorchModelV2) + def forward( + self, + input_dict: Dict[str, TensorType], + state: List[TensorType], + seq_lens: TensorType, + ) -> (TensorType, List[TensorType]): + obs = input_dict["obs_flat"].float() + self._last_flat_in = obs.reshape(obs.shape[0], -1) + self._features = self._hidden_layers(self._last_flat_in) + logits = self._logits(self._features) if self._logits else self._features + if self.free_log_std: + logits = self._append_free_log_std(logits) + return logits, state + + @override(TorchModelV2) + def value_function(self) -> TensorType: + assert self._features is not None, "must call forward() first" + if self._value_branch_separate: + out = self._value_branch( + self._value_branch_separate(self._last_flat_in) + ).squeeze(1) + else: + out = self._value_branch(self._features).squeeze(1) + return out diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/mingpt.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/mingpt.py new file mode 100644 index 0000000000000000000000000000000000000000..4bf54aa2fe8e5c4a88c1dc9ba7ca700f9659405b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/mingpt.py @@ -0,0 +1,303 @@ +# LICENSE: MIT +""" +Adapted from https://github.com/karpathy/minGPT + +Full definition of a GPT Language Model, all of it in this single file. +References: +1) the official GPT-2 TensorFlow implementation released by OpenAI: +https://github.com/openai/gpt-2/blob/master/src/model.py +2) huggingface/transformers PyTorch implementation: +https://github.com/huggingface/transformers/blob/main/src/transformers + /models/gpt2/modeling_gpt2.py +""" + +import math +from dataclasses import dataclass +from typing import Tuple + +import torch +import torch.nn as nn +from torch.nn import functional as F + +from ray.rllib.utils.annotations import DeveloperAPI +from ray.rllib.utils.deprecation import Deprecated + + +@DeveloperAPI +@dataclass +class GPTConfig: + # block size must be provided + block_size: int + + # transformer config + n_layer: int = 12 + n_head: int = 12 + n_embed: int = 768 + + # dropout config + embed_pdrop: float = 0.1 + resid_pdrop: float = 0.1 + attn_pdrop: float = 0.1 + + +@Deprecated(error=False) +class NewGELU(nn.Module): + """ + Implementation of the GELU activation function currently in Google BERT + repo (identical to OpenAI GPT). + Reference: Gaussian Error Linear Units (GELU) paper: + https://arxiv.org/abs/1606.08415 + """ + + def forward(self, x): + return ( + 0.5 + * x + * ( + 1.0 + + torch.tanh( + math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)) + ) + ) + ) + + +@Deprecated(error=False) +class CausalSelfAttention(nn.Module): + """ + Vanilla multi-head masked self-attention layer with a projection at the end. + It is possible to use torch.nn.MultiheadAttention here but I am including an + explicit implementation here to show that there is nothing too scary here. + """ + + def __init__(self, config: GPTConfig): + super().__init__() + assert config.n_embed % config.n_head == 0 + # key, query, value projections for all heads, but in a batch + self.c_attn = nn.Linear(config.n_embed, 3 * config.n_embed) + # output projection + self.c_proj = nn.Linear(config.n_embed, config.n_embed) + # regularization + self.attn_dropout = nn.Dropout(config.attn_pdrop) + self.resid_dropout = nn.Dropout(config.resid_pdrop) + # causal mask to ensure that attention is only applied to the left + # in the input sequence + self.register_buffer( + "bias", + torch.tril(torch.ones(config.block_size, config.block_size)).view( + 1, 1, config.block_size, config.block_size + ), + ) + self.n_head = config.n_head + self.n_embed = config.n_embed + + def forward(self, x, attention_masks=None): + # batch size, sequence length, embedding dimensionality (n_embed) + B, T, C = x.size() + + # calculate query, key, values for all heads in batch and move head + # forward to be the batch dim + q, k, v = self.c_attn(x).split(self.n_embed, dim=2) + # (B, nh, T, hs) + k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) + # (B, nh, T, hs) + q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) + # (B, nh, T, hs) + v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) + + # causal self-attention; Self-attend: + # (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T) + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float("-inf")) + if attention_masks is not None: + att = att + attention_masks + att = F.softmax(att, dim=-1) + att = self.attn_dropout(att) + y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs) + # re-assemble all head outputs side by side + y = y.transpose(1, 2).contiguous().view(B, T, C) + + # output projection + y = self.resid_dropout(self.c_proj(y)) + return y, att + + +@Deprecated(error=False) +class Block(nn.Module): + """an unassuming Transformer block""" + + def __init__(self, config: GPTConfig): + super().__init__() + self.ln_1 = nn.LayerNorm(config.n_embed) + self.attn = CausalSelfAttention(config) + self.ln_2 = nn.LayerNorm(config.n_embed) + self.mlp = nn.ModuleDict( + dict( + c_fc=nn.Linear(config.n_embed, 4 * config.n_embed), + c_proj=nn.Linear(4 * config.n_embed, config.n_embed), + act=NewGELU(), + dropout=nn.Dropout(config.resid_pdrop), + ) + ) + + def forward(self, x, attention_masks=None): + # Multi-head attention sub-layer. + x_att, att = self.attn(self.ln_1(x), attention_masks=attention_masks) + # Residual of multi-head attention sub-layer. + x = x + x_att + + # Position-wise FFN sub-layer: fc + activation + fc + dropout + x_ffn = self.mlp.dropout(self.mlp.c_proj(self.mlp.act(self.mlp.c_fc(x)))) + # Residual of position-wise FFN sub-layer. + x = x + x_ffn + return x, att + + +@Deprecated(error=False) +def configure_gpt_optimizer( + model: nn.Module, + learning_rate: float, + weight_decay: float, + betas: Tuple[float, float] = (0.9, 0.95), + **kwargs, +) -> torch.optim.Optimizer: + """ + This long function is unfortunately doing something very simple and is + being very defensive: We are separating out all parameters of the model + into two buckets: those that will experience weight decay for regularization + and those that won't (biases, and layernorm/embedding weights). We are then + returning the PyTorch optimizer object. + """ + + # separate out all parameters to those that will and won't experience + # regularizing weight decay + decay = set() + no_decay = set() + whitelist_w_modules = (torch.nn.Linear,) + blacklist_w_modules = (torch.nn.LayerNorm, torch.nn.Embedding) + for mn, m in model.named_modules(): + for pn, p in m.named_parameters(): + fpn = "%s.%s" % (mn, pn) if mn else pn # full param name + # random note: because named_modules and named_parameters are + # recursive we will see the same tensors p many many times. but + # doing it this way allows us to know which parent module any + # tensor p belongs to... + if pn.endswith("bias"): + # all biases will not be decayed + no_decay.add(fpn) + elif pn.endswith("weight") and isinstance(m, whitelist_w_modules): + # weights of whitelist modules will be weight decayed + decay.add(fpn) + elif pn.endswith("weight") and isinstance(m, blacklist_w_modules): + # weights of blacklist modules will NOT be weight decayed + no_decay.add(fpn) + + # validate that we considered every parameter + param_dict = {pn: p for pn, p in model.named_parameters()} + inter_params = decay & no_decay + union_params = decay | no_decay + assert ( + len(inter_params) == 0 + ), f"parameters {str(inter_params)} made it into both decay/no_decay sets!" + assert len(param_dict.keys() - union_params) == 0, ( + f"parameters {str(param_dict.keys() - union_params)} were not " + f"separated into either decay/no_decay set!" + ) + + # create the pytorch optimizer object + optim_groups = [ + { + "params": [param_dict[pn] for pn in sorted(decay)], + "weight_decay": weight_decay, + }, + { + "params": [param_dict[pn] for pn in sorted(no_decay)], + "weight_decay": 0.0, + }, + ] + optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas, **kwargs) + return optimizer + + +@Deprecated(error=False) +class GPT(nn.Module): + """GPT Transformer Model""" + + def __init__(self, config: GPTConfig): + super().__init__() + assert config.block_size is not None + self.block_size = config.block_size + + self.transformer = nn.ModuleDict( + dict( + drop=nn.Dropout(config.embed_pdrop), + h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]), + ln_f=nn.LayerNorm(config.n_embed), + ) + ) + + # init all weights, and apply a special scaled init to the residual + # projections, per GPT-2 paper + self.apply(self._init_weights) + for pn, p in self.named_parameters(): + if pn.endswith("c_proj.weight"): + torch.nn.init.normal_( + p, mean=0.0, std=0.02 / math.sqrt(2 * config.n_layer) + ) + + def _init_weights(self, module): + if isinstance(module, nn.Linear): + torch.nn.init.normal_(module.weight, mean=0.0, std=0.02) + if module.bias is not None: + torch.nn.init.zeros_(module.bias) + elif isinstance(module, nn.Embedding): + torch.nn.init.normal_(module.weight, mean=0.0, std=0.02) + elif isinstance(module, nn.LayerNorm): + torch.nn.init.zeros_(module.bias) + torch.nn.init.ones_(module.weight) + + def forward(self, input_embeds, attention_masks=None, return_attentions=False): + """ + input_embeds: [batch_size x seq_len x n_embed] + attention_masks: [batch_size x seq_len], 0 don't attend, 1 attend + """ + B, T, C = input_embeds.size() + assert T <= self.block_size, ( + f"Cannot forward sequence of length {T}, " + f"block size is only {self.block_size}" + ) + + if attention_masks is not None: + _B, _T = attention_masks.size() + assert _B == B and _T == T + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_len] + # So we can broadcast to + # [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular + # masking of causal attention used in OpenAI GPT, we just need + # to prepare the broadcast dimension here. + attention_masks = attention_masks[:, None, None, :] + + # Since attention_mask is 1.0 for positions we want to attend + # and 0.0 for masked positions, this operation will create a + # tensor which is 0.0 for positions we want to attend and -inf + # for masked positions. Since we are adding it to the raw scores + # before the softmax, this is effectively the same as removing + # these entirely. + attention_masks = attention_masks.to(dtype=input_embeds.dtype) + attention_masks = (1.0 - attention_masks) * -1e9 + + # forward the GPT model itself + x = self.transformer.drop(input_embeds) + + atts = [] + for block in self.transformer.h: + x, att = block(x, attention_masks=attention_masks) + atts.append(att) + x = self.transformer.ln_f(x) + + if return_attentions: + return x, atts + else: + return x diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/misc.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..5850eba0a3df007499bfc7caf65a1ab46c6b6913 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/misc.py @@ -0,0 +1,323 @@ +""" Code adapted from https://github.com/ikostrikov/pytorch-a3c""" +import numpy as np +from typing import Union, Tuple, Any, List + +from ray.rllib.models.utils import get_activation_fn +from ray.rllib.utils.annotations import DeveloperAPI +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.typing import TensorType + +torch, nn = try_import_torch() + + +@DeveloperAPI +def normc_initializer(std: float = 1.0) -> Any: + def initializer(tensor): + tensor.data.normal_(0, 1) + tensor.data *= std / torch.sqrt(tensor.data.pow(2).sum(1, keepdim=True)) + + return initializer + + +@DeveloperAPI +def same_padding( + in_size: Tuple[int, int], + filter_size: Union[int, Tuple[int, int]], + stride_size: Union[int, Tuple[int, int]], +) -> (Union[int, Tuple[int, int]], Tuple[int, int]): + """Note: Padding is added to match TF conv2d `same` padding. + + See www.tensorflow.org/versions/r0.12/api_docs/python/nn/convolution + + Args: + in_size: Rows (Height), Column (Width) for input + stride_size (Union[int,Tuple[int, int]]): Rows (Height), column (Width) + for stride. If int, height == width. + filter_size: Rows (Height), column (Width) for filter + + Returns: + padding: For input into torch.nn.ZeroPad2d. + output: Output shape after padding and convolution. + """ + in_height, in_width = in_size + if isinstance(filter_size, int): + filter_height, filter_width = filter_size, filter_size + else: + filter_height, filter_width = filter_size + if isinstance(stride_size, (int, float)): + stride_height, stride_width = int(stride_size), int(stride_size) + else: + stride_height, stride_width = int(stride_size[0]), int(stride_size[1]) + + out_height = int(np.ceil(float(in_height) / float(stride_height))) + out_width = int(np.ceil(float(in_width) / float(stride_width))) + + pad_along_height = int((out_height - 1) * stride_height + filter_height - in_height) + pad_along_width = int((out_width - 1) * stride_width + filter_width - in_width) + pad_top = pad_along_height // 2 + pad_bottom = pad_along_height - pad_top + pad_left = pad_along_width // 2 + pad_right = pad_along_width - pad_left + padding = (pad_left, pad_right, pad_top, pad_bottom) + output = (out_height, out_width) + return padding, output + + +@DeveloperAPI +def same_padding_transpose_after_stride( + strided_size: Tuple[int, int], + kernel: Tuple[int, int], + stride: Union[int, Tuple[int, int]], +) -> (Union[int, Tuple[int, int]], Tuple[int, int]): + """Computes padding and output size such that TF Conv2DTranspose `same` is matched. + + Note that when padding="same", TensorFlow's Conv2DTranspose makes sure that + 0-padding is added to the already strided image in such a way that the output image + has the same size as the input image times the stride (and no matter the + kernel size). + + For example: Input image is (4, 4, 24) (not yet strided), padding is "same", + stride=2, kernel=5. + + First, the input image is strided (with stride=2): + + Input image (4x4): + A B C D + E F G H + I J K L + M N O P + + Stride with stride=2 -> (7x7) + A 0 B 0 C 0 D + 0 0 0 0 0 0 0 + E 0 F 0 G 0 H + 0 0 0 0 0 0 0 + I 0 J 0 K 0 L + 0 0 0 0 0 0 0 + M 0 N 0 O 0 P + + Then this strided image (strided_size=7x7) is padded (exact padding values will be + output by this function): + + padding -> (left=3, right=2, top=3, bottom=2) + + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 A 0 B 0 C 0 D 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 E 0 F 0 G 0 H 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 I 0 J 0 K 0 L 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 M 0 N 0 O 0 P 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + + Then deconvolution with kernel=5 yields an output image of 8x8 (x num output + filters). + + Args: + strided_size: The size (width x height) of the already strided image. + kernel: Either width x height (tuple of ints) or - if a square kernel is used - + a single int for both width and height. + stride: Either stride width x stride height (tuple of ints) or - if square + striding is used - a single int for both width- and height striding. + + Returns: + Tuple consisting of 1) `padding`: A 4-tuple to pad the input after(!) striding. + The values are for left, right, top, and bottom padding, individually. + This 4-tuple can be used in a torch.nn.ZeroPad2d layer, and 2) the output shape + after striding, padding, and the conv transpose layer. + """ + + # Solve single int (squared) inputs for kernel and/or stride. + k_w, k_h = (kernel, kernel) if isinstance(kernel, int) else kernel + s_w, s_h = (stride, stride) if isinstance(stride, int) else stride + + # Compute the total size of the 0-padding on both axes. If results are odd numbers, + # the padding on e.g. left and right (or top and bottom) side will have to differ + # by 1. + pad_total_w, pad_total_h = k_w - 1 + s_w - 1, k_h - 1 + s_h - 1 + pad_right = pad_total_w // 2 + pad_left = pad_right + (1 if pad_total_w % 2 == 1 else 0) + pad_bottom = pad_total_h // 2 + pad_top = pad_bottom + (1 if pad_total_h % 2 == 1 else 0) + + # Compute the output size. + output_shape = ( + strided_size[0] + pad_total_w - k_w + 1, + strided_size[1] + pad_total_h - k_h + 1, + ) + + # Return padding and output shape. + return (pad_left, pad_right, pad_top, pad_bottom), output_shape + + +@DeveloperAPI +def valid_padding( + in_size: Tuple[int, int], + filter_size: Union[int, Tuple[int, int]], + stride_size: Union[int, Tuple[int, int]], +) -> Tuple[int, int]: + """Emulates TF Conv2DLayer "valid" padding (no padding) and computes output dims. + + This method, analogous to its "same" counterpart, but it only computes the output + image size, since valid padding means (0, 0, 0, 0). + + See www.tensorflow.org/versions/r0.12/api_docs/python/nn/convolution + + Args: + in_size: Rows (Height), Column (Width) for input + stride_size (Union[int,Tuple[int, int]]): Rows (Height), column (Width) + for stride. If int, height == width. + filter_size: Rows (Height), column (Width) for filter + + Returns: + The output shape after padding and convolution. + """ + in_height, in_width = in_size + if isinstance(filter_size, int): + filter_height, filter_width = filter_size, filter_size + else: + filter_height, filter_width = filter_size + if isinstance(stride_size, (int, float)): + stride_height, stride_width = int(stride_size), int(stride_size) + else: + stride_height, stride_width = int(stride_size[0]), int(stride_size[1]) + + out_height = int(np.ceil((in_height - filter_height + 1) / float(stride_height))) + out_width = int(np.ceil((in_width - filter_width + 1) / float(stride_width))) + + return (out_height, out_width) + + +@DeveloperAPI +class SlimConv2d(nn.Module): + """Simple mock of tf.slim Conv2d""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]], + padding: Union[int, Tuple[int, int]], + # Defaulting these to nn.[..] will break soft torch import. + initializer: Any = "default", + activation_fn: Any = "default", + bias_init: float = 0, + ): + """Creates a standard Conv2d layer, similar to torch.nn.Conv2d + + Args: + in_channels: Number of input channels + out_channels: Number of output channels + kernel: If int, the kernel is + a tuple(x,x). Elsewise, the tuple can be specified + stride: Controls the stride + for the cross-correlation. If int, the stride is a + tuple(x,x). Elsewise, the tuple can be specified + padding: Controls the amount + of implicit zero-paddings during the conv operation + initializer: Initializer function for kernel weights + activation_fn: Activation function at the end of layer + bias_init: Initalize bias weights to bias_init const + """ + super(SlimConv2d, self).__init__() + layers = [] + # Padding layer. + if padding: + layers.append(nn.ZeroPad2d(padding)) + # Actual Conv2D layer (including correct initialization logic). + conv = nn.Conv2d(in_channels, out_channels, kernel, stride) + if initializer: + if initializer == "default": + initializer = nn.init.xavier_uniform_ + initializer(conv.weight) + nn.init.constant_(conv.bias, bias_init) + layers.append(conv) + # Activation function (if any; default=ReLu). + if isinstance(activation_fn, str): + if activation_fn == "default": + activation_fn = nn.ReLU + else: + activation_fn = get_activation_fn(activation_fn, "torch") + if activation_fn is not None: + layers.append(activation_fn()) + # Put everything in sequence. + self._model = nn.Sequential(*layers) + + def forward(self, x: TensorType) -> TensorType: + return self._model(x) + + +@DeveloperAPI +class SlimFC(nn.Module): + """Simple PyTorch version of `linear` function""" + + def __init__( + self, + in_size: int, + out_size: int, + initializer: Any = None, + activation_fn: Any = None, + use_bias: bool = True, + bias_init: float = 0.0, + ): + """Creates a standard FC layer, similar to torch.nn.Linear + + Args: + in_size: Input size for FC Layer + out_size: Output size for FC Layer + initializer: Initializer function for FC layer weights + activation_fn: Activation function at the end of layer + use_bias: Whether to add bias weights or not + bias_init: Initalize bias weights to bias_init const + """ + super(SlimFC, self).__init__() + layers = [] + # Actual nn.Linear layer (including correct initialization logic). + linear = nn.Linear(in_size, out_size, bias=use_bias) + if initializer is None: + initializer = nn.init.xavier_uniform_ + initializer(linear.weight) + if use_bias is True: + nn.init.constant_(linear.bias, bias_init) + layers.append(linear) + # Activation function (if any; default=None (linear)). + if isinstance(activation_fn, str): + activation_fn = get_activation_fn(activation_fn, "torch") + if activation_fn is not None: + layers.append(activation_fn()) + # Put everything in sequence. + self._model = nn.Sequential(*layers) + + def forward(self, x: TensorType) -> TensorType: + return self._model(x) + + +@DeveloperAPI +class AppendBiasLayer(nn.Module): + """Simple bias appending layer for free_log_std.""" + + def __init__(self, num_bias_vars: int): + super().__init__() + self.log_std = torch.nn.Parameter(torch.as_tensor([0.0] * num_bias_vars)) + self.register_parameter("log_std", self.log_std) + + def forward(self, x: TensorType) -> TensorType: + out = torch.cat([x, self.log_std.unsqueeze(0).repeat([len(x), 1])], axis=1) + return out + + +@DeveloperAPI +class Reshape(nn.Module): + """Standard module that reshapes/views a tensor""" + + def __init__(self, shape: List): + super().__init__() + self.shape = shape + + def forward(self, x): + return x.view(*self.shape) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/gru_gate.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/gru_gate.py new file mode 100644 index 0000000000000000000000000000000000000000..7bb6eee845423898c7a553667bc18159cd9f2ff2 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/gru_gate.py @@ -0,0 +1,67 @@ +from ray.rllib.utils.annotations import OldAPIStack +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.typing import TensorType + +torch, nn = try_import_torch() + + +@OldAPIStack +class GRUGate(nn.Module): + """Implements a gated recurrent unit for use in AttentionNet""" + + def __init__(self, dim: int, init_bias: int = 0.0, **kwargs): + """ + input_shape (torch.Tensor): dimension of the input + init_bias: Bias added to every input to stabilize training + """ + super().__init__(**kwargs) + # Xavier initialization of torch tensors + self._w_r = nn.Parameter(torch.zeros(dim, dim)) + self._w_z = nn.Parameter(torch.zeros(dim, dim)) + self._w_h = nn.Parameter(torch.zeros(dim, dim)) + nn.init.xavier_uniform_(self._w_r) + nn.init.xavier_uniform_(self._w_z) + nn.init.xavier_uniform_(self._w_h) + self.register_parameter("_w_r", self._w_r) + self.register_parameter("_w_z", self._w_z) + self.register_parameter("_w_h", self._w_h) + + self._u_r = nn.Parameter(torch.zeros(dim, dim)) + self._u_z = nn.Parameter(torch.zeros(dim, dim)) + self._u_h = nn.Parameter(torch.zeros(dim, dim)) + nn.init.xavier_uniform_(self._u_r) + nn.init.xavier_uniform_(self._u_z) + nn.init.xavier_uniform_(self._u_h) + self.register_parameter("_u_r", self._u_r) + self.register_parameter("_u_z", self._u_z) + self.register_parameter("_u_h", self._u_h) + + self._bias_z = nn.Parameter( + torch.zeros( + dim, + ).fill_(init_bias) + ) + self.register_parameter("_bias_z", self._bias_z) + + def forward(self, inputs: TensorType, **kwargs) -> TensorType: + # Pass in internal state first. + h, X = inputs + + r = torch.tensordot(X, self._w_r, dims=1) + torch.tensordot( + h, self._u_r, dims=1 + ) + r = torch.sigmoid(r) + + z = ( + torch.tensordot(X, self._w_z, dims=1) + + torch.tensordot(h, self._u_z, dims=1) + - self._bias_z + ) + z = torch.sigmoid(z) + + h_next = torch.tensordot(X, self._w_h, dims=1) + torch.tensordot( + (h * r), self._u_h, dims=1 + ) + h_next = torch.tanh(h_next) + + return (1 - z) * h + z * h_next diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/multi_head_attention.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/multi_head_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..cf4dfb50b2648b3ccd133a6b7be2e26839052969 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/multi_head_attention.py @@ -0,0 +1,70 @@ +""" +[1] - Attention Is All You Need - Vaswani, Jones, Shazeer, Parmar, + Uszkoreit, Gomez, Kaiser - Google Brain/Research, U Toronto - 2017. + https://arxiv.org/pdf/1706.03762.pdf +""" +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.models.torch.misc import SlimFC +from ray.rllib.utils.annotations import OldAPIStack +from ray.rllib.utils.torch_utils import sequence_mask +from ray.rllib.utils.framework import TensorType + +torch, nn = try_import_torch() + + +@OldAPIStack +class MultiHeadAttention(nn.Module): + """A multi-head attention layer described in [1].""" + + def __init__( + self, in_dim: int, out_dim: int, num_heads: int, head_dim: int, **kwargs + ): + """ + in_dim: Dimension of input + out_dim: Dimension of output + num_heads: Number of attention heads + head_dim: Output dimension of each attention head + """ + super().__init__(**kwargs) + + # No bias or non-linearity. + self._num_heads = num_heads + self._head_dim = head_dim + self._qkv_layer = SlimFC( + in_size=in_dim, out_size=3 * num_heads * head_dim, use_bias=False + ) + + self._linear_layer = SlimFC( + in_size=num_heads * head_dim, out_size=out_dim, use_bias=False + ) + + def forward(self, inputs: TensorType) -> TensorType: + L = list(inputs.size())[1] # length of segment + H = self._num_heads # number of attention heads + D = self._head_dim # attention head dimension + + qkv = self._qkv_layer(inputs) + + queries, keys, values = torch.chunk(input=qkv, chunks=3, dim=-1) + queries = queries[:, -L:] # only query based on the segment + + queries = torch.reshape(queries, [-1, L, H, D]) + keys = torch.reshape(keys, [-1, L, H, D]) + values = torch.reshape(values, [-1, L, H, D]) + + score = torch.einsum("bihd,bjhd->bijh", queries, keys) + score = score / D**0.5 + + # causal mask of the same length as the sequence + mask = sequence_mask(torch.arange(1, L + 1), dtype=score.dtype) + mask = mask[None, :, :, None] + mask = mask.float() + + masked_score = score * mask + 1e30 * (mask - 1.0) + wmat = nn.functional.softmax(masked_score, dim=2) + + out = torch.einsum("bijh,bjhd->bihd", wmat, values) + shape = list(out.size())[:2] + [H * D] + # temp = torch.cat(temp2, [H * D], dim=0) + out = torch.reshape(out, shape) + return self._linear_layer(out) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/noisy_layer.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/noisy_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..8a9fe999cf79baeaa35af895311fbe8ecacdd302 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/noisy_layer.py @@ -0,0 +1,99 @@ +import numpy as np + +from ray.rllib.models.utils import get_activation_fn +from ray.rllib.utils.framework import try_import_torch, TensorType + +torch, nn = try_import_torch() + + +class NoisyLayer(nn.Module): + r"""A Layer that adds learnable Noise to some previous layer's outputs. + + Consists of: + - a common dense layer: y = w^{T}x + b + - a noisy layer: y = (w + \epsilon_w*\sigma_w)^{T}x + + (b+\epsilon_b*\sigma_b) + , where \epsilon are random variables sampled from factorized normal + distributions and \sigma are trainable variables which are expected to + vanish along the training procedure. + """ + + def __init__( + self, in_size: int, out_size: int, sigma0: float, activation: str = "relu" + ): + """Initializes a NoisyLayer object. + + Args: + in_size: Input size for Noisy Layer + out_size: Output size for Noisy Layer + sigma0: Initialization value for sigma_b (bias noise) + activation: Non-linear activation for Noisy Layer + """ + super().__init__() + + self.in_size = in_size + self.out_size = out_size + self.sigma0 = sigma0 + self.activation = get_activation_fn(activation, framework="torch") + if self.activation is not None: + self.activation = self.activation() + + sigma_w = nn.Parameter( + torch.from_numpy( + np.random.uniform( + low=-1.0 / np.sqrt(float(self.in_size)), + high=1.0 / np.sqrt(float(self.in_size)), + size=[self.in_size, out_size], + ) + ).float() + ) + self.register_parameter("sigma_w", sigma_w) + sigma_b = nn.Parameter( + torch.from_numpy( + np.full( + shape=[out_size], fill_value=sigma0 / np.sqrt(float(self.in_size)) + ) + ).float() + ) + self.register_parameter("sigma_b", sigma_b) + + w = nn.Parameter( + torch.from_numpy( + np.full( + shape=[self.in_size, self.out_size], + fill_value=6 / np.sqrt(float(in_size) + float(out_size)), + ) + ).float() + ) + self.register_parameter("w", w) + b = nn.Parameter(torch.from_numpy(np.zeros([out_size])).float()) + self.register_parameter("b", b) + + def forward(self, inputs: TensorType) -> TensorType: + epsilon_in = self._f_epsilon( + torch.normal( + mean=torch.zeros([self.in_size]), std=torch.ones([self.in_size]) + ).to(inputs.device) + ) + epsilon_out = self._f_epsilon( + torch.normal( + mean=torch.zeros([self.out_size]), std=torch.ones([self.out_size]) + ).to(inputs.device) + ) + epsilon_w = torch.matmul( + torch.unsqueeze(epsilon_in, -1), other=torch.unsqueeze(epsilon_out, 0) + ) + epsilon_b = epsilon_out + + action_activation = ( + torch.matmul(inputs, self.w + self.sigma_w * epsilon_w) + + self.b + + self.sigma_b * epsilon_b + ) + + if self.activation is not None: + action_activation = self.activation(action_activation) + return action_activation + + def _f_epsilon(self, x: TensorType) -> TensorType: + return torch.sign(x) * torch.pow(torch.abs(x), 0.5) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/relative_multi_head_attention.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/relative_multi_head_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..aa19207e024d7d3f4a365e455974b4127b279606 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/modules/relative_multi_head_attention.py @@ -0,0 +1,177 @@ +from typing import Union + +from ray.rllib.models.torch.misc import SlimFC +from ray.rllib.utils.annotations import OldAPIStack +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.torch_utils import sequence_mask +from ray.rllib.utils.typing import TensorType + +torch, nn = try_import_torch() + + +@OldAPIStack +class RelativePositionEmbedding(nn.Module): + """Creates a [seq_length x seq_length] matrix for rel. pos encoding. + + Denoted as Phi in [2] and [3]. Phi is the standard sinusoid encoding + matrix. + + Args: + seq_length: The max. sequence length (time axis). + out_dim: The number of nodes to go into the first Tranformer + layer with. + + Returns: + torch.Tensor: The encoding matrix Phi. + """ + + def __init__(self, out_dim, **kwargs): + super().__init__() + self.out_dim = out_dim + + out_range = torch.arange(0, self.out_dim, 2.0) + inverse_freq = 1 / (10000 ** (out_range / self.out_dim)) + self.register_buffer("inverse_freq", inverse_freq) + + def forward(self, seq_length): + pos_input = torch.arange(seq_length - 1, -1, -1.0, dtype=torch.float).to( + self.inverse_freq.device + ) + sinusoid_input = torch.einsum("i,j->ij", pos_input, self.inverse_freq) + pos_embeddings = torch.cat( + [torch.sin(sinusoid_input), torch.cos(sinusoid_input)], dim=-1 + ) + return pos_embeddings[:, None, :] + + +class RelativeMultiHeadAttention(nn.Module): + """A RelativeMultiHeadAttention layer as described in [3]. + + Uses segment level recurrence with state reuse. + """ + + def __init__( + self, + in_dim: int, + out_dim: int, + num_heads: int, + head_dim: int, + input_layernorm: bool = False, + output_activation: Union[str, callable] = None, + **kwargs + ): + """Initializes a RelativeMultiHeadAttention nn.Module object. + + Args: + in_dim (int): + out_dim: The output dimension of this module. Also known as + "attention dim". + num_heads: The number of attention heads to use. + Denoted `H` in [2]. + head_dim: The dimension of a single(!) attention head + Denoted `D` in [2]. + input_layernorm: Whether to prepend a LayerNorm before + everything else. Should be True for building a GTrXL. + output_activation (Union[str, callable]): Optional activation + function or activation function specifier (str). + Should be "relu" for GTrXL. + **kwargs: + """ + super().__init__(**kwargs) + + # No bias or non-linearity. + self._num_heads = num_heads + self._head_dim = head_dim + + # 3=Query, key, and value inputs. + self._qkv_layer = SlimFC( + in_size=in_dim, out_size=3 * num_heads * head_dim, use_bias=False + ) + + self._linear_layer = SlimFC( + in_size=num_heads * head_dim, + out_size=out_dim, + use_bias=False, + activation_fn=output_activation, + ) + + self._uvar = nn.Parameter(torch.zeros(num_heads, head_dim)) + self._vvar = nn.Parameter(torch.zeros(num_heads, head_dim)) + nn.init.xavier_uniform_(self._uvar) + nn.init.xavier_uniform_(self._vvar) + self.register_parameter("_uvar", self._uvar) + self.register_parameter("_vvar", self._vvar) + + self._pos_proj = SlimFC( + in_size=in_dim, out_size=num_heads * head_dim, use_bias=False + ) + self._rel_pos_embedding = RelativePositionEmbedding(out_dim) + + self._input_layernorm = None + if input_layernorm: + self._input_layernorm = torch.nn.LayerNorm(in_dim) + + def forward(self, inputs: TensorType, memory: TensorType = None) -> TensorType: + T = list(inputs.size())[1] # length of segment (time) + H = self._num_heads # number of attention heads + d = self._head_dim # attention head dimension + + # Add previous memory chunk (as const, w/o gradient) to input. + # Tau (number of (prev) time slices in each memory chunk). + Tau = list(memory.shape)[1] + inputs = torch.cat((memory.detach(), inputs), dim=1) + + # Apply the Layer-Norm. + if self._input_layernorm is not None: + inputs = self._input_layernorm(inputs) + + qkv = self._qkv_layer(inputs) + + queries, keys, values = torch.chunk(input=qkv, chunks=3, dim=-1) + # Cut out Tau memory timesteps from query. + queries = queries[:, -T:] + + queries = torch.reshape(queries, [-1, T, H, d]) + keys = torch.reshape(keys, [-1, Tau + T, H, d]) + values = torch.reshape(values, [-1, Tau + T, H, d]) + + R = self._pos_proj(self._rel_pos_embedding(Tau + T)) + R = torch.reshape(R, [Tau + T, H, d]) + + # b=batch + # i and j=time indices (i=max-timesteps (inputs); j=Tau memory space) + # h=head + # d=head-dim (over which we will reduce-sum) + score = torch.einsum("bihd,bjhd->bijh", queries + self._uvar, keys) + pos_score = torch.einsum("bihd,jhd->bijh", queries + self._vvar, R) + score = score + self.rel_shift(pos_score) + score = score / d**0.5 + + # causal mask of the same length as the sequence + mask = sequence_mask(torch.arange(Tau + 1, Tau + T + 1), dtype=score.dtype).to( + score.device + ) + mask = mask[None, :, :, None] + + masked_score = score * mask + 1e30 * (mask.float() - 1.0) + wmat = nn.functional.softmax(masked_score, dim=2) + + out = torch.einsum("bijh,bjhd->bihd", wmat, values) + shape = list(out.shape)[:2] + [H * d] + out = torch.reshape(out, shape) + + return self._linear_layer(out) + + @staticmethod + def rel_shift(x: TensorType) -> TensorType: + # Transposed version of the shift approach described in [3]. + # https://github.com/kimiyoung/transformer-xl/blob/ + # 44781ed21dbaec88b280f74d9ae2877f52b492a5/tf/model.py#L31 + x_size = list(x.shape) + + x = torch.nn.functional.pad(x, (0, 0, 1, 0, 0, 0, 0, 0)) + x = torch.reshape(x, [x_size[0], x_size[2] + 1, x_size[1], x_size[3]]) + x = x[:, 1:, :, :] + x = torch.reshape(x, x_size) + + return x diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/recurrent_net.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/recurrent_net.py new file mode 100644 index 0000000000000000000000000000000000000000..01fbab223e29fe721914bab0e15860cb91f760d4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/recurrent_net.py @@ -0,0 +1,295 @@ +import numpy as np +import gymnasium as gym +from gymnasium.spaces import Discrete, MultiDiscrete +import tree # pip install dm_tree +from typing import Dict, List, Union, Tuple + +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.models.torch.misc import SlimFC +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.policy.rnn_sequencing import add_time_dimension +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.view_requirement import ViewRequirement +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space +from ray.rllib.utils.torch_utils import flatten_inputs_to_1d_tensor, one_hot +from ray.rllib.utils.typing import ModelConfigDict, TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util.debug import log_once + +torch, nn = try_import_torch() + + +@OldAPIStack +class RecurrentNetwork(TorchModelV2): + """Helper class to simplify implementing RNN models with TorchModelV2. + + Instead of implementing forward(), you can implement forward_rnn() which + takes batches with the time dimension added already. + + Here is an example implementation for a subclass + ``MyRNNClass(RecurrentNetwork, nn.Module)``:: + + def __init__(self, obs_space, num_outputs): + nn.Module.__init__(self) + super().__init__(obs_space, action_space, num_outputs, + model_config, name) + self.obs_size = _get_size(obs_space) + self.rnn_hidden_dim = model_config["lstm_cell_size"] + self.fc1 = nn.Linear(self.obs_size, self.rnn_hidden_dim) + self.rnn = nn.GRUCell(self.rnn_hidden_dim, self.rnn_hidden_dim) + self.fc2 = nn.Linear(self.rnn_hidden_dim, num_outputs) + + self.value_branch = nn.Linear(self.rnn_hidden_dim, 1) + self._cur_value = None + + @override(ModelV2) + def get_initial_state(self): + # Place hidden states on same device as model. + h = [self.fc1.weight.new( + 1, self.rnn_hidden_dim).zero_().squeeze(0)] + return h + + @override(ModelV2) + def value_function(self): + assert self._cur_value is not None, "must call forward() first" + return self._cur_value + + @override(RecurrentNetwork) + def forward_rnn(self, input_dict, state, seq_lens): + x = nn.functional.relu(self.fc1(input_dict["obs_flat"].float())) + h_in = state[0].reshape(-1, self.rnn_hidden_dim) + h = self.rnn(x, h_in) + q = self.fc2(h) + self._cur_value = self.value_branch(h).squeeze(1) + return q, [h] + """ + + @override(ModelV2) + def forward( + self, + input_dict: Dict[str, TensorType], + state: List[TensorType], + seq_lens: TensorType, + ) -> Tuple[TensorType, List[TensorType]]: + """Adds time dimension to batch before sending inputs to forward_rnn(). + + You should implement forward_rnn() in your subclass.""" + # Creating a __init__ function that acts as a passthrough and adding the warning + # there led to errors probably due to the multiple inheritance. We encountered + # the same error if we add the Deprecated decorator. We therefore add the + # deprecation warning here. + if log_once("recurrent_network_tf"): + deprecation_warning( + old="ray.rllib.models.torch.recurrent_net.RecurrentNetwork" + ) + flat_inputs = input_dict["obs_flat"].float() + # Note that max_seq_len != input_dict.max_seq_len != seq_lens.max() + # as input_dict may have extra zero-padding beyond seq_lens.max(). + # Use add_time_dimension to handle this + self.time_major = self.model_config.get("_time_major", False) + inputs = add_time_dimension( + flat_inputs, + seq_lens=seq_lens, + framework="torch", + time_major=self.time_major, + ) + output, new_state = self.forward_rnn(inputs, state, seq_lens) + output = torch.reshape(output, [-1, self.num_outputs]) + return output, new_state + + def forward_rnn( + self, inputs: TensorType, state: List[TensorType], seq_lens: TensorType + ) -> Tuple[TensorType, List[TensorType]]: + """Call the model with the given input tensors and state. + + Args: + inputs: Observation tensor with shape [B, T, obs_size]. + state: List of state tensors, each with shape [B, size]. + seq_lens: 1D tensor holding input sequence lengths. + Note: len(seq_lens) == B. + + Returns: + (outputs, new_state): The model output tensor of shape + [B, T, num_outputs] and the list of new state tensors each with + shape [B, size]. + + Examples: + def forward_rnn(self, inputs, state, seq_lens): + model_out, h, c = self.rnn_model([inputs, seq_lens] + state) + return model_out, [h, c] + """ + raise NotImplementedError("You must implement this for an RNN model") + + +@OldAPIStack +class LSTMWrapper(RecurrentNetwork, nn.Module): + """An LSTM wrapper serving as an interface for ModelV2s that set use_lstm.""" + + def __init__( + self, + obs_space: gym.spaces.Space, + action_space: gym.spaces.Space, + num_outputs: int, + model_config: ModelConfigDict, + name: str, + ): + nn.Module.__init__(self) + super(LSTMWrapper, self).__init__( + obs_space, action_space, None, model_config, name + ) + + # At this point, self.num_outputs is the number of nodes coming + # from the wrapped (underlying) model. In other words, self.num_outputs + # is the input size for the LSTM layer. + # If None, set it to the observation space. + if self.num_outputs is None: + self.num_outputs = int(np.prod(self.obs_space.shape)) + + self.cell_size = model_config["lstm_cell_size"] + self.time_major = model_config.get("_time_major", False) + self.use_prev_action = model_config["lstm_use_prev_action"] + self.use_prev_reward = model_config["lstm_use_prev_reward"] + + self.action_space_struct = get_base_struct_from_space(self.action_space) + self.action_dim = 0 + + for space in tree.flatten(self.action_space_struct): + if isinstance(space, Discrete): + self.action_dim += space.n + elif isinstance(space, MultiDiscrete): + self.action_dim += np.sum(space.nvec) + elif space.shape is not None: + self.action_dim += int(np.prod(space.shape)) + else: + self.action_dim += int(len(space)) + + # Add prev-action/reward nodes to input to LSTM. + if self.use_prev_action: + self.num_outputs += self.action_dim + if self.use_prev_reward: + self.num_outputs += 1 + + # Define actual LSTM layer (with num_outputs being the nodes coming + # from the wrapped (underlying) layer). + self.lstm = nn.LSTM( + self.num_outputs, self.cell_size, batch_first=not self.time_major + ) + + # Set self.num_outputs to the number of output nodes desired by the + # caller of this constructor. + self.num_outputs = num_outputs + + # Postprocess LSTM output with another hidden layer and compute values. + self._logits_branch = SlimFC( + in_size=self.cell_size, + out_size=self.num_outputs, + activation_fn=None, + initializer=torch.nn.init.xavier_uniform_, + ) + self._value_branch = SlimFC( + in_size=self.cell_size, + out_size=1, + activation_fn=None, + initializer=torch.nn.init.xavier_uniform_, + ) + + # __sphinx_doc_begin__ + # Add prev-a/r to this model's view, if required. + if model_config["lstm_use_prev_action"]: + self.view_requirements[SampleBatch.PREV_ACTIONS] = ViewRequirement( + SampleBatch.ACTIONS, space=self.action_space, shift=-1 + ) + if model_config["lstm_use_prev_reward"]: + self.view_requirements[SampleBatch.PREV_REWARDS] = ViewRequirement( + SampleBatch.REWARDS, shift=-1 + ) + # __sphinx_doc_end__ + + @override(RecurrentNetwork) + def forward( + self, + input_dict: Dict[str, TensorType], + state: List[TensorType], + seq_lens: TensorType, + ) -> Tuple[TensorType, List[TensorType]]: + assert seq_lens is not None + # Push obs through "unwrapped" net's `forward()` first. + wrapped_out, _ = self._wrapped_forward(input_dict, [], None) + + # Concat. prev-action/reward if required. + prev_a_r = [] + + # Prev actions. + if self.model_config["lstm_use_prev_action"]: + prev_a = input_dict[SampleBatch.PREV_ACTIONS] + # If actions are not processed yet (in their original form as + # have been sent to environment): + # Flatten/one-hot into 1D array. + if self.model_config["_disable_action_flattening"]: + prev_a_r.append( + flatten_inputs_to_1d_tensor( + prev_a, spaces_struct=self.action_space_struct, time_axis=False + ) + ) + # If actions are already flattened (but not one-hot'd yet!), + # one-hot discrete/multi-discrete actions here. + else: + if isinstance(self.action_space, (Discrete, MultiDiscrete)): + prev_a = one_hot(prev_a.float(), self.action_space) + else: + prev_a = prev_a.float() + prev_a_r.append(torch.reshape(prev_a, [-1, self.action_dim])) + # Prev rewards. + if self.model_config["lstm_use_prev_reward"]: + prev_a_r.append( + torch.reshape(input_dict[SampleBatch.PREV_REWARDS].float(), [-1, 1]) + ) + + # Concat prev. actions + rewards to the "main" input. + if prev_a_r: + wrapped_out = torch.cat([wrapped_out] + prev_a_r, dim=1) + + # Push everything through our LSTM. + input_dict["obs_flat"] = wrapped_out + return super().forward(input_dict, state, seq_lens) + + @override(RecurrentNetwork) + def forward_rnn( + self, inputs: TensorType, state: List[TensorType], seq_lens: TensorType + ) -> Tuple[TensorType, List[TensorType]]: + # Don't show paddings to RNN(?) + # TODO: (sven) For now, only allow, iff time_major=True to not break + # anything retrospectively (time_major not supported previously). + # max_seq_len = inputs.shape[0] + # time_major = self.model_config["_time_major"] + # if time_major and max_seq_len > 1: + # inputs = torch.nn.utils.rnn.pack_padded_sequence( + # inputs, seq_lens, + # batch_first=not time_major, enforce_sorted=False) + self._features, [h, c] = self.lstm( + inputs, [torch.unsqueeze(state[0], 0), torch.unsqueeze(state[1], 0)] + ) + # Re-apply paddings. + # if time_major and max_seq_len > 1: + # self._features, _ = torch.nn.utils.rnn.pad_packed_sequence( + # self._features, + # batch_first=not time_major) + model_out = self._logits_branch(self._features) + return model_out, [torch.squeeze(h, 0), torch.squeeze(c, 0)] + + @override(ModelV2) + def get_initial_state(self) -> Union[List[np.ndarray], List[TensorType]]: + # Place hidden states on same device as model. + linear = next(self._logits_branch._model.children()) + h = [ + linear.weight.new(1, self.cell_size).zero_().squeeze(0), + linear.weight.new(1, self.cell_size).zero_().squeeze(0), + ] + return h + + @override(ModelV2) + def value_function(self) -> TensorType: + assert self._features is not None, "must call forward() first" + return torch.reshape(self._value_branch(self._features), [-1]) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_action_dist.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_action_dist.py new file mode 100644 index 0000000000000000000000000000000000000000..91c69180070e9797d62f39939db03340979ee2d6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_action_dist.py @@ -0,0 +1,644 @@ +import functools +import gymnasium as gym +from math import log +import numpy as np +import tree # pip install dm_tree +from typing import Optional + +from ray.rllib.models.action_dist import ActionDistribution +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.numpy import SMALL_NUMBER, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT +from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space +from ray.rllib.utils.typing import TensorType, List, Union, Tuple, ModelConfigDict + +torch, nn = try_import_torch() + + +@OldAPIStack +class TorchDistributionWrapper(ActionDistribution): + """Wrapper class for torch.distributions.""" + + @override(ActionDistribution) + def __init__(self, inputs: List[TensorType], model: TorchModelV2): + # If inputs are not a torch Tensor, make them one and make sure they + # are on the correct device. + if not isinstance(inputs, torch.Tensor): + inputs = torch.from_numpy(inputs) + if isinstance(model, TorchModelV2): + inputs = inputs.to(next(model.parameters()).device) + super().__init__(inputs, model) + # Store the last sample here. + self.last_sample = None + + @override(ActionDistribution) + def logp(self, actions: TensorType) -> TensorType: + return self.dist.log_prob(actions) + + @override(ActionDistribution) + def entropy(self) -> TensorType: + return self.dist.entropy() + + @override(ActionDistribution) + def kl(self, other: ActionDistribution) -> TensorType: + return torch.distributions.kl.kl_divergence(self.dist, other.dist) + + @override(ActionDistribution) + def sample(self) -> TensorType: + self.last_sample = self.dist.sample() + return self.last_sample + + @override(ActionDistribution) + def sampled_action_logp(self) -> TensorType: + assert self.last_sample is not None + return self.logp(self.last_sample) + + +@OldAPIStack +class TorchCategorical(TorchDistributionWrapper): + """Wrapper class for PyTorch Categorical distribution.""" + + @override(ActionDistribution) + def __init__( + self, + inputs: List[TensorType], + model: TorchModelV2 = None, + temperature: float = 1.0, + ): + if temperature != 1.0: + assert temperature > 0.0, "Categorical `temperature` must be > 0.0!" + inputs /= temperature + super().__init__(inputs, model) + self.dist = torch.distributions.categorical.Categorical(logits=self.inputs) + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + self.last_sample = self.dist.probs.argmax(dim=1) + return self.last_sample + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape( + action_space: gym.Space, model_config: ModelConfigDict + ) -> Union[int, np.ndarray]: + return action_space.n + + +@OldAPIStack +def get_torch_categorical_class_with_temperature(t: float): + """TorchCategorical distribution class that has customized default temperature.""" + + class TorchCategoricalWithTemperature(TorchCategorical): + def __init__(self, inputs, model=None, temperature=t): + super().__init__(inputs, model, temperature) + + return TorchCategoricalWithTemperature + + +@OldAPIStack +class TorchMultiCategorical(TorchDistributionWrapper): + """MultiCategorical distribution for MultiDiscrete action spaces.""" + + @override(TorchDistributionWrapper) + def __init__( + self, + inputs: List[TensorType], + model: TorchModelV2, + input_lens: Union[List[int], np.ndarray, Tuple[int, ...]], + action_space=None, + ): + super().__init__(inputs, model) + # If input_lens is np.ndarray or list, force-make it a tuple. + inputs_split = self.inputs.split(tuple(input_lens), dim=1) + self.cats = [ + torch.distributions.categorical.Categorical(logits=input_) + for input_ in inputs_split + ] + # Used in case we are dealing with an Int Box. + self.action_space = action_space + + @override(TorchDistributionWrapper) + def sample(self) -> TensorType: + arr = [cat.sample() for cat in self.cats] + sample_ = torch.stack(arr, dim=1) + if isinstance(self.action_space, gym.spaces.Box): + sample_ = torch.reshape(sample_, [-1] + list(self.action_space.shape)) + self.last_sample = sample_ + return sample_ + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + arr = [torch.argmax(cat.probs, -1) for cat in self.cats] + sample_ = torch.stack(arr, dim=1) + if isinstance(self.action_space, gym.spaces.Box): + sample_ = torch.reshape(sample_, [-1] + list(self.action_space.shape)) + self.last_sample = sample_ + return sample_ + + @override(TorchDistributionWrapper) + def logp(self, actions: TensorType) -> TensorType: + # # If tensor is provided, unstack it into list. + if isinstance(actions, torch.Tensor): + if isinstance(self.action_space, gym.spaces.Box): + actions = torch.reshape( + actions, [-1, int(np.prod(self.action_space.shape))] + ) + actions = torch.unbind(actions, dim=1) + logps = torch.stack([cat.log_prob(act) for cat, act in zip(self.cats, actions)]) + return torch.sum(logps, dim=0) + + @override(ActionDistribution) + def multi_entropy(self) -> TensorType: + return torch.stack([cat.entropy() for cat in self.cats], dim=1) + + @override(TorchDistributionWrapper) + def entropy(self) -> TensorType: + return torch.sum(self.multi_entropy(), dim=1) + + @override(ActionDistribution) + def multi_kl(self, other: ActionDistribution) -> TensorType: + return torch.stack( + [ + torch.distributions.kl.kl_divergence(cat, oth_cat) + for cat, oth_cat in zip(self.cats, other.cats) + ], + dim=1, + ) + + @override(TorchDistributionWrapper) + def kl(self, other: ActionDistribution) -> TensorType: + return torch.sum(self.multi_kl(other), dim=1) + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape( + action_space: gym.Space, model_config: ModelConfigDict + ) -> Union[int, np.ndarray]: + # Int Box. + if isinstance(action_space, gym.spaces.Box): + assert action_space.dtype.name.startswith("int") + low_ = np.min(action_space.low) + high_ = np.max(action_space.high) + assert np.all(action_space.low == low_) + assert np.all(action_space.high == high_) + return np.prod(action_space.shape, dtype=np.int32) * (high_ - low_ + 1) + # MultiDiscrete space. + else: + # `nvec` is already integer. No need to cast. + return np.sum(action_space.nvec) + + +@OldAPIStack +class TorchSlateMultiCategorical(TorchCategorical): + """MultiCategorical distribution for MultiDiscrete action spaces. + + The action space must be uniform, meaning all nvec items have the same size, e.g. + MultiDiscrete([10, 10, 10]), where 10 is the number of candidates to pick from + and 3 is the slate size (pick 3 out of 10). When picking candidates, no candidate + must be picked more than once. + """ + + def __init__( + self, + inputs: List[TensorType], + model: TorchModelV2 = None, + temperature: float = 1.0, + action_space: Optional[gym.spaces.MultiDiscrete] = None, + all_slates=None, + ): + assert temperature > 0.0, "Categorical `temperature` must be > 0.0!" + # Allow softmax formula w/ temperature != 1.0: + # Divide inputs by temperature. + super().__init__(inputs / temperature, model) + self.action_space = action_space + # Assert uniformness of the action space (all discrete buckets have the same + # size). + assert isinstance(self.action_space, gym.spaces.MultiDiscrete) and all( + n == self.action_space.nvec[0] for n in self.action_space.nvec + ) + self.all_slates = all_slates + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + # Get a sample from the underlying Categorical (batch of ints). + sample = super().deterministic_sample() + # Use the sampled ints to pick the actual slates. + return torch.take_along_dim(self.all_slates, sample.long(), dim=-1) + + @override(ActionDistribution) + def logp(self, x: TensorType) -> TensorType: + # TODO: Implement. + return torch.ones_like(self.inputs[:, 0]) + + +@OldAPIStack +class TorchDiagGaussian(TorchDistributionWrapper): + """Wrapper class for PyTorch Normal distribution.""" + + @override(ActionDistribution) + def __init__( + self, + inputs: List[TensorType], + model: TorchModelV2, + *, + action_space: Optional[gym.spaces.Space] = None + ): + super().__init__(inputs, model) + mean, log_std = torch.chunk(self.inputs, 2, dim=1) + self.log_std = log_std + self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std)) + # Remember to squeeze action samples in case action space is Box(shape) + self.zero_action_dim = action_space and action_space.shape == () + + @override(TorchDistributionWrapper) + def sample(self) -> TensorType: + sample = super().sample() + if self.zero_action_dim: + return torch.squeeze(sample, dim=-1) + return sample + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + self.last_sample = self.dist.mean + return self.last_sample + + @override(TorchDistributionWrapper) + def logp(self, actions: TensorType) -> TensorType: + return super().logp(actions).sum(-1) + + @override(TorchDistributionWrapper) + def entropy(self) -> TensorType: + return super().entropy().sum(-1) + + @override(TorchDistributionWrapper) + def kl(self, other: ActionDistribution) -> TensorType: + return super().kl(other).sum(-1) + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape( + action_space: gym.Space, model_config: ModelConfigDict + ) -> Union[int, np.ndarray]: + return np.prod(action_space.shape, dtype=np.int32) * 2 + + +@OldAPIStack +class TorchSquashedGaussian(TorchDistributionWrapper): + """A tanh-squashed Gaussian distribution defined by: mean, std, low, high. + + The distribution will never return low or high exactly, but + `low`+SMALL_NUMBER or `high`-SMALL_NUMBER respectively. + """ + + def __init__( + self, + inputs: List[TensorType], + model: TorchModelV2, + low: float = -1.0, + high: float = 1.0, + ): + """Parameterizes the distribution via `inputs`. + + Args: + low: The lowest possible sampling value + (excluding this value). + high: The highest possible sampling value + (excluding this value). + """ + super().__init__(inputs, model) + # Split inputs into mean and log(std). + mean, log_std = torch.chunk(self.inputs, 2, dim=-1) + # Clip `scale` values (coming from NN) to reasonable values. + log_std = torch.clamp(log_std, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT) + std = torch.exp(log_std) + self.dist = torch.distributions.normal.Normal(mean, std) + assert np.all(np.less(low, high)) + self.low = low + self.high = high + self.mean = mean + self.std = std + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + self.last_sample = self._squash(self.dist.mean) + return self.last_sample + + @override(TorchDistributionWrapper) + def sample(self) -> TensorType: + # Use the reparameterization version of `dist.sample` to allow for + # the results to be backprop'able e.g. in a loss term. + + normal_sample = self.dist.rsample() + self.last_sample = self._squash(normal_sample) + return self.last_sample + + @override(ActionDistribution) + def logp(self, x: TensorType) -> TensorType: + # Unsquash values (from [low,high] to ]-inf,inf[) + unsquashed_values = self._unsquash(x) + # Get log prob of unsquashed values from our Normal. + log_prob_gaussian = self.dist.log_prob(unsquashed_values) + # For safety reasons, clamp somehow, only then sum up. + log_prob_gaussian = torch.clamp(log_prob_gaussian, -100, 100) + log_prob_gaussian = torch.sum(log_prob_gaussian, dim=-1) + # Get log-prob for squashed Gaussian. + unsquashed_values_tanhd = torch.tanh(unsquashed_values) + log_prob = log_prob_gaussian - torch.sum( + torch.log(1 - unsquashed_values_tanhd**2 + SMALL_NUMBER), dim=-1 + ) + return log_prob + + def sample_logp(self): + z = self.dist.rsample() + actions = self._squash(z) + return actions, torch.sum( + self.dist.log_prob(z) - torch.log(1 - actions * actions + SMALL_NUMBER), + dim=-1, + ) + + @override(TorchDistributionWrapper) + def entropy(self) -> TensorType: + raise ValueError("Entropy not defined for SquashedGaussian!") + + @override(TorchDistributionWrapper) + def kl(self, other: ActionDistribution) -> TensorType: + raise ValueError("KL not defined for SquashedGaussian!") + + def _squash(self, raw_values: TensorType) -> TensorType: + # Returned values are within [low, high] (including `low` and `high`). + squashed = ((torch.tanh(raw_values) + 1.0) / 2.0) * ( + self.high - self.low + ) + self.low + return torch.clamp(squashed, self.low, self.high) + + def _unsquash(self, values: TensorType) -> TensorType: + normed_values = (values - self.low) / (self.high - self.low) * 2.0 - 1.0 + # Stabilize input to atanh. + save_normed_values = torch.clamp( + normed_values, -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER + ) + unsquashed = torch.atanh(save_normed_values) + return unsquashed + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape( + action_space: gym.Space, model_config: ModelConfigDict + ) -> Union[int, np.ndarray]: + return np.prod(action_space.shape, dtype=np.int32) * 2 + + +@OldAPIStack +class TorchBeta(TorchDistributionWrapper): + """ + A Beta distribution is defined on the interval [0, 1] and parameterized by + shape parameters alpha and beta (also called concentration parameters). + + PDF(x; alpha, beta) = x**(alpha - 1) (1 - x)**(beta - 1) / Z + with Z = Gamma(alpha) Gamma(beta) / Gamma(alpha + beta) + and Gamma(n) = (n - 1)! + """ + + def __init__( + self, + inputs: List[TensorType], + model: TorchModelV2, + low: float = 0.0, + high: float = 1.0, + ): + super().__init__(inputs, model) + # Stabilize input parameters (possibly coming from a linear layer). + self.inputs = torch.clamp(self.inputs, log(SMALL_NUMBER), -log(SMALL_NUMBER)) + self.inputs = torch.log(torch.exp(self.inputs) + 1.0) + 1.0 + self.low = low + self.high = high + alpha, beta = torch.chunk(self.inputs, 2, dim=-1) + # Note: concentration0==beta, concentration1=alpha (!) + self.dist = torch.distributions.Beta(concentration1=alpha, concentration0=beta) + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + self.last_sample = self._squash(self.dist.mean) + return self.last_sample + + @override(TorchDistributionWrapper) + def sample(self) -> TensorType: + # Use the reparameterization version of `dist.sample` to allow for + # the results to be backprop'able e.g. in a loss term. + normal_sample = self.dist.rsample() + self.last_sample = self._squash(normal_sample) + return self.last_sample + + @override(ActionDistribution) + def logp(self, x: TensorType) -> TensorType: + unsquashed_values = self._unsquash(x) + return torch.sum(self.dist.log_prob(unsquashed_values), dim=-1) + + def _squash(self, raw_values: TensorType) -> TensorType: + return raw_values * (self.high - self.low) + self.low + + def _unsquash(self, values: TensorType) -> TensorType: + return (values - self.low) / (self.high - self.low) + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape( + action_space: gym.Space, model_config: ModelConfigDict + ) -> Union[int, np.ndarray]: + return np.prod(action_space.shape, dtype=np.int32) * 2 + + +@OldAPIStack +class TorchDeterministic(TorchDistributionWrapper): + """Action distribution that returns the input values directly. + + This is similar to DiagGaussian with standard deviation zero (thus only + requiring the "mean" values as NN output). + """ + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + return self.inputs + + @override(TorchDistributionWrapper) + def sampled_action_logp(self) -> TensorType: + return torch.zeros((self.inputs.size()[0],), dtype=torch.float32) + + @override(TorchDistributionWrapper) + def sample(self) -> TensorType: + return self.deterministic_sample() + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape( + action_space: gym.Space, model_config: ModelConfigDict + ) -> Union[int, np.ndarray]: + return np.prod(action_space.shape, dtype=np.int32) + + +@OldAPIStack +class TorchMultiActionDistribution(TorchDistributionWrapper): + """Action distribution that operates on multiple, possibly nested actions.""" + + def __init__(self, inputs, model, *, child_distributions, input_lens, action_space): + """Initializes a TorchMultiActionDistribution object. + + Args: + inputs (torch.Tensor): A single tensor of shape [BATCH, size]. + model (TorchModelV2): The TorchModelV2 object used to produce + inputs for this distribution. + child_distributions (any[torch.Tensor]): Any struct + that contains the child distribution classes to use to + instantiate the child distributions from `inputs`. This could + be an already flattened list or a struct according to + `action_space`. + input_lens (any[int]): A flat list or a nested struct of input + split lengths used to split `inputs`. + action_space (Union[gym.spaces.Dict,gym.spaces.Tuple]): The complex + and possibly nested action space. + """ + if not isinstance(inputs, torch.Tensor): + inputs = torch.from_numpy(inputs) + if isinstance(model, TorchModelV2): + inputs = inputs.to(next(model.parameters()).device) + super().__init__(inputs, model) + + self.action_space_struct = get_base_struct_from_space(action_space) + + self.input_lens = tree.flatten(input_lens) + flat_child_distributions = tree.flatten(child_distributions) + split_inputs = torch.split(inputs, self.input_lens, dim=1) + self.flat_child_distributions = tree.map_structure( + lambda dist, input_: dist(input_, model), + flat_child_distributions, + list(split_inputs), + ) + + @override(ActionDistribution) + def logp(self, x): + if isinstance(x, np.ndarray): + x = torch.Tensor(x) + # Single tensor input (all merged). + if isinstance(x, torch.Tensor): + split_indices = [] + for dist in self.flat_child_distributions: + if isinstance(dist, TorchCategorical): + split_indices.append(1) + elif ( + isinstance(dist, TorchMultiCategorical) + and dist.action_space is not None + ): + split_indices.append(int(np.prod(dist.action_space.shape))) + else: + sample = dist.sample() + # Cover Box(shape=()) case. + if len(sample.shape) == 1: + split_indices.append(1) + else: + split_indices.append(sample.size()[1]) + split_x = list(torch.split(x, split_indices, dim=1)) + # Structured or flattened (by single action component) input. + else: + split_x = tree.flatten(x) + + def map_(val, dist): + # Remove extra categorical dimension. + if isinstance(dist, TorchCategorical): + val = (torch.squeeze(val, dim=-1) if len(val.shape) > 1 else val).int() + return dist.logp(val) + + # Remove extra categorical dimension and take the logp of each + # component. + flat_logps = tree.map_structure(map_, split_x, self.flat_child_distributions) + + return functools.reduce(lambda a, b: a + b, flat_logps) + + @override(ActionDistribution) + def kl(self, other): + kl_list = [ + d.kl(o) + for d, o in zip( + self.flat_child_distributions, other.flat_child_distributions + ) + ] + return functools.reduce(lambda a, b: a + b, kl_list) + + @override(ActionDistribution) + def entropy(self): + entropy_list = [d.entropy() for d in self.flat_child_distributions] + return functools.reduce(lambda a, b: a + b, entropy_list) + + @override(ActionDistribution) + def sample(self): + child_distributions = tree.unflatten_as( + self.action_space_struct, self.flat_child_distributions + ) + return tree.map_structure(lambda s: s.sample(), child_distributions) + + @override(ActionDistribution) + def deterministic_sample(self): + child_distributions = tree.unflatten_as( + self.action_space_struct, self.flat_child_distributions + ) + return tree.map_structure( + lambda s: s.deterministic_sample(), child_distributions + ) + + @override(TorchDistributionWrapper) + def sampled_action_logp(self): + p = self.flat_child_distributions[0].sampled_action_logp() + for c in self.flat_child_distributions[1:]: + p += c.sampled_action_logp() + return p + + @override(ActionDistribution) + def required_model_output_shape(self, action_space, model_config): + return np.sum(self.input_lens, dtype=np.int32) + + +@OldAPIStack +class TorchDirichlet(TorchDistributionWrapper): + """Dirichlet distribution for continuous actions that are between + [0,1] and sum to 1. + + e.g. actions that represent resource allocation.""" + + def __init__(self, inputs, model): + """Input is a tensor of logits. The exponential of logits is used to + parametrize the Dirichlet distribution as all parameters need to be + positive. An arbitrary small epsilon is added to the concentration + parameters to be zero due to numerical error. + + See issue #4440 for more details. + """ + self.epsilon = torch.tensor(1e-7).to(inputs.device) + concentration = torch.exp(inputs) + self.epsilon + self.dist = torch.distributions.dirichlet.Dirichlet( + concentration=concentration, + validate_args=True, + ) + super().__init__(concentration, model) + + @override(ActionDistribution) + def deterministic_sample(self) -> TensorType: + self.last_sample = nn.functional.softmax(self.dist.concentration, dim=-1) + return self.last_sample + + @override(ActionDistribution) + def logp(self, x): + # Support of Dirichlet are positive real numbers. x is already + # an array of positive numbers, but we clip to avoid zeros due to + # numerical errors. + x = torch.max(x, self.epsilon) + x = x / torch.sum(x, dim=-1, keepdim=True) + return self.dist.log_prob(x) + + @override(ActionDistribution) + def entropy(self): + return self.dist.entropy() + + @staticmethod + @override(ActionDistribution) + def required_model_output_shape(action_space, model_config): + return np.prod(action_space.shape, dtype=np.int32) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_distributions.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_distributions.py new file mode 100644 index 0000000000000000000000000000000000000000..f2165f1bca65dae9c6edcfb459ca7e5c15402578 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_distributions.py @@ -0,0 +1,682 @@ +"""The main difference between this and the old ActionDistribution is that this one +has more explicit input args. So that the input format does not have to be guessed from +the code. This matches the design pattern of torch distribution which developers may +already be familiar with. +""" +import gymnasium as gym +import numpy as np +from typing import Dict, Iterable, List, Optional +import tree +import abc + + +from ray.rllib.models.distributions import Distribution +from ray.rllib.utils.annotations import override, DeveloperAPI +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.numpy import MAX_LOG_NN_OUTPUT, MIN_LOG_NN_OUTPUT, SMALL_NUMBER +from ray.rllib.utils.typing import TensorType, Union, Tuple + +torch, nn = try_import_torch() + + +@DeveloperAPI +class TorchDistribution(Distribution, abc.ABC): + """Wrapper class for torch.distributions.""" + + def __init__(self, *args, **kwargs): + super().__init__() + self._dist = self._get_torch_distribution(*args, **kwargs) + + @abc.abstractmethod + def _get_torch_distribution( + self, *args, **kwargs + ) -> "torch.distributions.Distribution": + """Returns the torch.distributions.Distribution object to use.""" + + @override(Distribution) + def logp(self, value: TensorType, **kwargs) -> TensorType: + return self._dist.log_prob(value, **kwargs) + + @override(Distribution) + def entropy(self) -> TensorType: + return self._dist.entropy() + + @override(Distribution) + def kl(self, other: "Distribution") -> TensorType: + return torch.distributions.kl.kl_divergence(self._dist, other._dist) + + @override(Distribution) + def sample( + self, + *, + sample_shape=None, + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + sample = self._dist.sample( + sample_shape if sample_shape is not None else torch.Size() + ) + return sample + + @override(Distribution) + def rsample( + self, + *, + sample_shape=None, + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + rsample = self._dist.rsample( + sample_shape if sample_shape is not None else torch.Size() + ) + return rsample + + +@DeveloperAPI +class TorchCategorical(TorchDistribution): + """Wrapper class for PyTorch Categorical distribution. + + Creates a categorical distribution parameterized by either :attr:`probs` or + :attr:`logits` (but not both). + + Samples are integers from :math:`\{0, \ldots, K-1\}` where `K` is + ``probs.size(-1)``. + + If `probs` is 1-dimensional with length-`K`, each element is the relative + probability of sampling the class at that index. + + If `probs` is N-dimensional, the first N-1 dimensions are treated as a batch of + relative probability vectors. + + .. testcode:: + :skipif: True + + m = TorchCategorical(torch.tensor([ 0.25, 0.25, 0.25, 0.25 ])) + m.sample(sample_shape=(2,)) # equal probability of 0, 1, 2, 3 + + .. testoutput:: + + tensor([3, 4]) + + Args: + logits: Event log probabilities (unnormalized) + probs: The probablities of each event. + temperature: In case of using logits, this parameter can be used to determine + the sharpness of the distribution. i.e. + ``probs = softmax(logits / temperature)``. The temperature must be strictly + positive. A low value (e.g. 1e-10) will result in argmax sampling while a + larger value will result in uniform sampling. + """ + + @override(TorchDistribution) + def __init__( + self, + logits: "torch.Tensor" = None, + probs: "torch.Tensor" = None, + ) -> None: + # We assert this here because to_deterministic makes this assumption. + assert (probs is None) != ( + logits is None + ), "Exactly one out of `probs` and `logits` must be set!" + + self.probs = probs + self.logits = logits + super().__init__(logits=logits, probs=probs) + + # Build this distribution only if really needed (in `self.rsample()`). It's + # quite expensive according to cProfile. + self._one_hot = None + + @override(TorchDistribution) + def _get_torch_distribution( + self, + logits: "torch.Tensor" = None, + probs: "torch.Tensor" = None, + ) -> "torch.distributions.Distribution": + return torch.distributions.categorical.Categorical(logits=logits, probs=probs) + + @staticmethod + @override(Distribution) + def required_input_dim(space: gym.Space, **kwargs) -> int: + assert isinstance(space, gym.spaces.Discrete) + return int(space.n) + + @override(Distribution) + def rsample(self, sample_shape=()): + if self._one_hot is None: + self._one_hot = torch.distributions.one_hot_categorical.OneHotCategorical( + logits=self.logits, probs=self.probs + ) + one_hot_sample = self._one_hot.sample(sample_shape) + return (one_hot_sample - self.probs).detach() + self.probs + + @classmethod + @override(Distribution) + def from_logits(cls, logits: TensorType, **kwargs) -> "TorchCategorical": + return TorchCategorical(logits=logits, **kwargs) + + def to_deterministic(self) -> "TorchDeterministic": + if self.probs is not None: + probs_or_logits = self.probs + else: + probs_or_logits = self.logits + + return TorchDeterministic(loc=torch.argmax(probs_or_logits, dim=-1)) + + +@DeveloperAPI +class TorchDiagGaussian(TorchDistribution): + """Wrapper class for PyTorch Normal distribution. + + Creates a normal distribution parameterized by :attr:`loc` and :attr:`scale`. In + case of multi-dimensional distribution, the variance is assumed to be diagonal. + + .. testcode:: + :skipif: True + + loc, scale = torch.tensor([0.0, 0.0]), torch.tensor([1.0, 1.0]) + m = TorchDiagGaussian(loc=loc, scale=scale) + m.sample(sample_shape=(2,)) # 2d normal dist with loc=0 and scale=1 + + .. testoutput:: + + tensor([[ 0.1046, -0.6120], [ 0.234, 0.556]]) + + .. testcode:: + :skipif: True + + # scale is None + m = TorchDiagGaussian(loc=torch.tensor([0.0, 1.0])) + m.sample(sample_shape=(2,)) # normally distributed with loc=0 and scale=1 + + .. testoutput:: + + tensor([0.1046, 0.6120]) + + + Args: + loc: mean of the distribution (often referred to as mu). If scale is None, the + second half of the `loc` will be used as the log of scale. + scale: standard deviation of the distribution (often referred to as sigma). + Has to be positive. + """ + + @override(TorchDistribution) + def __init__( + self, + loc: Union[float, "torch.Tensor"], + scale: Optional[Union[float, "torch.Tensor"]], + ): + self.loc = loc + super().__init__(loc=loc, scale=scale) + + def _get_torch_distribution(self, loc, scale) -> "torch.distributions.Distribution": + return torch.distributions.normal.Normal(loc, scale) + + @override(TorchDistribution) + def logp(self, value: TensorType) -> TensorType: + return super().logp(value).sum(-1) + + @override(TorchDistribution) + def entropy(self) -> TensorType: + return super().entropy().sum(-1) + + @override(TorchDistribution) + def kl(self, other: "TorchDistribution") -> TensorType: + return super().kl(other).sum(-1) + + @staticmethod + @override(Distribution) + def required_input_dim(space: gym.Space, **kwargs) -> int: + assert isinstance(space, gym.spaces.Box) + return int(np.prod(space.shape, dtype=np.int32) * 2) + + @classmethod + @override(Distribution) + def from_logits(cls, logits: TensorType, **kwargs) -> "TorchDiagGaussian": + loc, log_std = logits.chunk(2, dim=-1) + scale = log_std.exp() + return TorchDiagGaussian(loc=loc, scale=scale) + + def to_deterministic(self) -> "TorchDeterministic": + return TorchDeterministic(loc=self.loc) + + +@DeveloperAPI +class TorchSquashedGaussian(TorchDistribution): + @override(TorchDistribution) + def __init__( + self, + loc: Union[float, "torch.Tensor"], + scale: Optional[Union[float, "torch.Tensor"]] = 1.0, + low: float = -1.0, + high: float = 1.0, + ): + self.loc = loc + self.low = low + self.high = high + + super().__init__(loc=loc, scale=scale) + + def _get_torch_distribution(self, loc, scale) -> "torch.distributions.Distribution": + return torch.distributions.normal.Normal(loc, scale) + + @override(TorchDistribution) + def sample( + self, *, sample_shape=None + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + # Sample from the Normal distribution. + sample = super().sample( + sample_shape=sample_shape if sample_shape is not None else torch.Size() + ) + # Return the squashed sample. + return self._squash(sample) + + @override(TorchDistribution) + def rsample( + self, *, sample_shape=None + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + # Sample from the Normal distribution. + sample = super().rsample( + sample_shape=sample_shape if sample_shape is not None else torch.Size() + ) + # Return the squashed sample. + return self._squash(sample) + + @override(TorchDistribution) + def logp(self, value: TensorType, **kwargs) -> TensorType: + # Unsquash value. + value = self._unsquash(value) + # Get log-probabilities from Normal distribution. + logp = super().logp(value, **kwargs) + # Clip the log probabilities as a safeguard and sum. + logp = torch.clamp(logp, -100, 100).sum(-1) + # Return the log probabilities for squashed Normal. + value = torch.tanh(value) + return logp - torch.log(1 - value**2 + SMALL_NUMBER).sum(-1) + + @override(TorchDistribution) + def entropy(self) -> TensorType: + raise ValueError("ENtropy not defined for `TorchSquashedGaussian`.") + + @override(TorchDistribution) + def kl(self, other: Distribution) -> TensorType: + raise ValueError("KL not defined for `TorchSquashedGaussian`.") + + def _squash(self, sample: TensorType) -> TensorType: + # Rescale the sample to interval given by the bounds (including the bounds). + sample = ((torch.tanh(sample) + 1.0) / 2.0) * (self.high - self.low) + self.low + # Return a clipped sample to comply with the bounds. + return torch.clamp(sample, self.low, self.high) + + def _unsquash(self, sample: TensorType) -> TensorType: + # Rescale to [-1.0, 1.0]. + sample = (sample - self.low) / (self.high - self.low) * 2.0 - 1.0 + # Stabilize input to atanh function. + sample = torch.clamp(sample, -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER) + return torch.atanh(sample) + + @staticmethod + @override(Distribution) + def required_input_dim(space: gym.Space, **kwargs) -> int: + assert isinstance(space, gym.spaces.Box), space + return int(np.prod(space.shape, dtype=np.int32) * 2) + + @classmethod + @override(TorchDistribution) + def from_logits( + cls, logits: TensorType, low: float = -1.0, high: float = 1.0, **kwargs + ) -> "TorchSquashedGaussian": + loc, log_std = logits.chunk(2, dim=-1) + # Clip the `scale` values (coming from the `RLModule.forward()`) to + # reasonable values. + log_std = torch.clamp(log_std, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT) + scale = log_std.exp() + + # Assert that `low` is smaller than `high`. + assert np.all(np.less(low, high)) + # Return class instance. + return TorchSquashedGaussian(loc=loc, scale=scale, low=low, high=high) + + def to_deterministic(self) -> Distribution: + return TorchDeterministic(loc=self.loc) + + +@DeveloperAPI +class TorchDeterministic(Distribution): + """The distribution that returns the input values directly. + + This is similar to DiagGaussian with standard deviation zero (thus only + requiring the "mean" values as NN output). + + Note: entropy is always zero, ang logp and kl are not implemented. + + .. testcode:: + :skipif: True + + m = TorchDeterministic(loc=torch.tensor([0.0, 0.0])) + m.sample(sample_shape=(2,)) + + .. testoutput:: + + tensor([[ 0.0, 0.0], [ 0.0, 0.0]]) + + Args: + loc: the determinsitic value to return + """ + + @override(Distribution) + def __init__(self, loc: "torch.Tensor") -> None: + super().__init__() + self.loc = loc + + @override(Distribution) + def sample( + self, + *, + sample_shape=None, + **kwargs, + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + device = self.loc.device + dtype = self.loc.dtype + shape = ( + sample_shape if sample_shape is not None else torch.Size() + ) + self.loc.shape + return torch.ones(shape, device=device, dtype=dtype) * self.loc + + def rsample( + self, + *, + sample_shape: Tuple[int, ...] = None, + **kwargs, + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + raise NotImplementedError + + @override(Distribution) + def logp(self, value: TensorType, **kwargs) -> TensorType: + return torch.zeros_like(self.loc) + + @override(Distribution) + def entropy(self, **kwargs) -> TensorType: + raise RuntimeError(f"`entropy()` not supported for {self.__class__.__name__}.") + + @override(Distribution) + def kl(self, other: "Distribution", **kwargs) -> TensorType: + raise RuntimeError(f"`kl()` not supported for {self.__class__.__name__}.") + + @staticmethod + @override(Distribution) + def required_input_dim(space: gym.Space, **kwargs) -> int: + assert isinstance(space, gym.spaces.Box) + return int(np.prod(space.shape, dtype=np.int32)) + + @classmethod + @override(Distribution) + def from_logits(cls, logits: TensorType, **kwargs) -> "TorchDeterministic": + return TorchDeterministic(loc=logits) + + def to_deterministic(self) -> "TorchDeterministic": + return self + + +@DeveloperAPI +class TorchMultiCategorical(Distribution): + """MultiCategorical distribution for MultiDiscrete action spaces.""" + + @override(Distribution) + def __init__( + self, + categoricals: List[TorchCategorical], + ): + super().__init__() + self._cats = categoricals + + @override(Distribution) + def sample(self) -> TensorType: + arr = [cat.sample() for cat in self._cats] + sample_ = torch.stack(arr, dim=-1) + return sample_ + + @override(Distribution) + def rsample(self, sample_shape=()): + arr = [cat.rsample() for cat in self._cats] + sample_ = torch.stack(arr, dim=-1) + return sample_ + + @override(Distribution) + def logp(self, value: "torch.Tensor") -> TensorType: + value = torch.unbind(value, dim=-1) + logps = torch.stack([cat.logp(act) for cat, act in zip(self._cats, value)]) + return torch.sum(logps, dim=0) + + @override(Distribution) + def entropy(self) -> TensorType: + return torch.sum( + torch.stack([cat.entropy() for cat in self._cats], dim=-1), dim=-1 + ) + + @override(Distribution) + def kl(self, other: Distribution) -> TensorType: + kls = torch.stack( + [cat.kl(oth_cat) for cat, oth_cat in zip(self._cats, other._cats)], + dim=-1, + ) + return torch.sum(kls, dim=-1) + + @staticmethod + @override(Distribution) + def required_input_dim(space: gym.Space, **kwargs) -> int: + assert isinstance(space, gym.spaces.MultiDiscrete) + return int(np.sum(space.nvec)) + + @classmethod + @override(Distribution) + def from_logits( + cls, + logits: "torch.Tensor", + input_lens: List[int], + temperatures: List[float] = None, + **kwargs, + ) -> "TorchMultiCategorical": + """Creates this Distribution from logits (and additional arguments). + + If you wish to create this distribution from logits only, please refer to + `Distribution.get_partial_dist_cls()`. + + Args: + logits: The tensor containing logits to be separated by logit_lens. + child_distribution_cls_struct: A struct of Distribution classes that can + be instantiated from the given logits. + input_lens: A list of integers that indicate the length of the logits + vectors to be passed into each child distribution. + temperatures: A list of floats representing the temperature to use for + each Categorical distribution. If not provided, 1.0 is used for all. + **kwargs: Forward compatibility kwargs. + """ + if not temperatures: + # If temperatures are not provided, use 1.0 for all actions. + temperatures = [1.0] * len(input_lens) + + assert ( + sum(input_lens) == logits.shape[-1] + ), "input_lens must sum to logits.shape[-1]" + assert len(input_lens) == len( + temperatures + ), "input_lens and temperatures must be same length" + + categoricals = [ + TorchCategorical(logits=logits) + for logits in torch.split(logits, input_lens, dim=-1) + ] + + return TorchMultiCategorical(categoricals=categoricals) + + def to_deterministic(self) -> "TorchDeterministic": + if self._cats[0].probs is not None: + probs_or_logits = nn.utils.rnn.pad_sequence( + [cat.logits.t() for cat in self._cats], padding_value=-torch.inf + ) + else: + probs_or_logits = nn.utils.rnn.pad_sequence( + [cat.logits.t() for cat in self._cats], padding_value=-torch.inf + ) + + return TorchDeterministic(loc=torch.argmax(probs_or_logits, dim=0)) + + +@DeveloperAPI +class TorchMultiDistribution(Distribution): + """Action distribution that operates on multiple, possibly nested actions.""" + + def __init__( + self, + child_distribution_struct: Union[Tuple, List, Dict], + ): + """Initializes a TorchMultiActionDistribution object. + + Args: + child_distribution_struct: A complex struct that contains the child + distribution instances that make up this multi-distribution. + """ + super().__init__() + self._original_struct = child_distribution_struct + self._flat_child_distributions = tree.flatten(child_distribution_struct) + + @override(Distribution) + def rsample( + self, + *, + sample_shape: Tuple[int, ...] = None, + **kwargs, + ) -> Union[TensorType, Tuple[TensorType, TensorType]]: + rsamples = [] + for dist in self._flat_child_distributions: + rsample = dist.rsample(sample_shape=sample_shape, **kwargs) + rsamples.append(rsample) + + rsamples = tree.unflatten_as(self._original_struct, rsamples) + + return rsamples + + @override(Distribution) + def logp(self, value: TensorType) -> TensorType: + # Different places in RLlib use this method with different inputs. + # We therefore need to handle a flattened and concatenated input, as well as + # a nested one. + # TODO(Artur): Deprecate tensor inputs, only allow nested structures. + if isinstance(value, torch.Tensor): + split_indices = [] + for dist in self._flat_child_distributions: + if isinstance(dist, TorchCategorical): + split_indices.append(1) + elif isinstance(dist, TorchMultiCategorical): + split_indices.append(len(dist._cats)) + else: + sample = dist.sample() + # Cover Box(shape=()) case. + if len(sample.shape) == 1: + split_indices.append(1) + else: + split_indices.append(sample.size()[1]) + split_value = list(torch.split(value, split_indices, dim=1)) + else: + split_value = tree.flatten(value) + + def map_(val, dist): + # Remove extra dimension if present. + if ( + isinstance(dist, TorchCategorical) + and val.shape[-1] == 1 + and len(val.shape) > 1 + ): + val = torch.squeeze(val, dim=-1) + return dist.logp(val) + + flat_logps = tree.map_structure( + map_, split_value, self._flat_child_distributions + ) + + return sum(flat_logps) + + @override(Distribution) + def kl(self, other: Distribution) -> TensorType: + kl_list = [ + d.kl(o) + for d, o in zip( + self._flat_child_distributions, other._flat_child_distributions + ) + ] + return sum(kl_list) + + @override(Distribution) + def entropy(self): + entropy_list = [d.entropy() for d in self._flat_child_distributions] + return sum(entropy_list) + + @override(Distribution) + def sample(self): + child_distributions_struct = tree.unflatten_as( + self._original_struct, self._flat_child_distributions + ) + return tree.map_structure(lambda s: s.sample(), child_distributions_struct) + + @staticmethod + @override(Distribution) + def required_input_dim( + space: gym.Space, input_lens: List[int], as_list: bool = False, **kwargs + ) -> int: + if as_list: + return input_lens + else: + return sum(input_lens) + + @classmethod + @override(Distribution) + def from_logits( + cls, + logits: "torch.Tensor", + child_distribution_cls_struct: Union[Dict, Iterable], + input_lens: Union[Dict, List[int]], + **kwargs, + ) -> "TorchMultiDistribution": + """Creates this Distribution from logits (and additional arguments). + + If you wish to create this distribution from logits only, please refer to + `Distribution.get_partial_dist_cls()`. + + Args: + logits: The tensor containing logits to be separated by `input_lens`. + child_distribution_cls_struct: A struct of Distribution classes that can + be instantiated from the given logits. + child_distribution_cls_struct: A struct of Distribution classes that can + be instantiated from the given logits. + input_lens: A list or dict of integers that indicate the length of each + logit. If this is given as a dict, the structure should match the + structure of child_distribution_cls_struct. + **kwargs: Forward compatibility kwargs. + + Returns: + A TorchMultiActionDistribution object. + """ + logit_lens = tree.flatten(input_lens) + child_distribution_cls_list = tree.flatten(child_distribution_cls_struct) + split_logits = torch.split(logits, logit_lens, dim=-1) + + child_distribution_list = tree.map_structure( + lambda dist, input_: dist.from_logits(input_), + child_distribution_cls_list, + list(split_logits), + ) + + child_distribution_struct = tree.unflatten_as( + child_distribution_cls_struct, child_distribution_list + ) + + return TorchMultiDistribution( + child_distribution_struct=child_distribution_struct, + ) + + def to_deterministic(self) -> "TorchMultiDistribution": + flat_deterministic_dists = [ + dist.to_deterministic() for dist in self._flat_child_distributions + ] + deterministic_dists = tree.unflatten_as( + self._original_struct, flat_deterministic_dists + ) + return TorchMultiDistribution(deterministic_dists) diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_modelv2.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_modelv2.py new file mode 100644 index 0000000000000000000000000000000000000000..dd473c70de3ef91384306ef7c23e58314179ba27 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/torch_modelv2.py @@ -0,0 +1,80 @@ +import gymnasium as gym +from typing import Dict, List, Union + +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.typing import ModelConfigDict, TensorType + +_, nn = try_import_torch() + + +@OldAPIStack +class TorchModelV2(ModelV2): + """Torch version of ModelV2. + + Note that this class by itself is not a valid model unless you + inherit from nn.Module and implement forward() in a subclass.""" + + def __init__( + self, + obs_space: gym.spaces.Space, + action_space: gym.spaces.Space, + num_outputs: int, + model_config: ModelConfigDict, + name: str, + ): + """Initialize a TorchModelV2. + + Here is an example implementation for a subclass + ``MyModelClass(TorchModelV2, nn.Module)``:: + + def __init__(self, *args, **kwargs): + TorchModelV2.__init__(self, *args, **kwargs) + nn.Module.__init__(self) + self._hidden_layers = nn.Sequential(...) + self._logits = ... + self._value_branch = ... + """ + if not isinstance(self, nn.Module): + raise ValueError( + "Subclasses of TorchModelV2 must also inherit from " + "nn.Module, e.g., MyModel(TorchModelV2, nn.Module)" + ) + + ModelV2.__init__( + self, + obs_space, + action_space, + num_outputs, + model_config, + name, + framework="torch", + ) + + # Dict to store per multi-gpu tower stats into. + # In PyTorch multi-GPU, we use a single TorchPolicy and copy + # it's Model(s) n times (1 copy for each GPU). When computing the loss + # on each tower, we cannot store the stats (e.g. `entropy`) inside the + # policy object as this would lead to race conditions between the + # different towers all accessing the same property at the same time. + self.tower_stats = {} + + @override(ModelV2) + def variables( + self, as_dict: bool = False + ) -> Union[List[TensorType], Dict[str, TensorType]]: + p = list(self.parameters()) + if as_dict: + return {k: p[i] for i, k in enumerate(self.state_dict().keys())} + return p + + @override(ModelV2) + def trainable_variables( + self, as_dict: bool = False + ) -> Union[List[TensorType], Dict[str, TensorType]]: + if as_dict: + return { + k: v for k, v in self.variables(as_dict=True).items() if v.requires_grad + } + return [v for v in self.variables() if v.requires_grad] diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/visionnet.py b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/visionnet.py new file mode 100644 index 0000000000000000000000000000000000000000..748ba5796e3bfb76cc0c04a4a2d14cffaede48ab --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/rllib/models/torch/visionnet.py @@ -0,0 +1,293 @@ +import numpy as np +from typing import Dict, List +import gymnasium as gym + +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.models.torch.misc import ( + normc_initializer, + same_padding, + SlimConv2d, + SlimFC, +) +from ray.rllib.models.utils import get_activation_fn, get_filter_config +from ray.rllib.utils.annotations import OldAPIStack, override +from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.typing import ModelConfigDict, TensorType + +torch, nn = try_import_torch() + + +@OldAPIStack +class VisionNetwork(TorchModelV2, nn.Module): + """Generic vision network.""" + + def __init__( + self, + obs_space: gym.spaces.Space, + action_space: gym.spaces.Space, + num_outputs: int, + model_config: ModelConfigDict, + name: str, + ): + if not model_config.get("conv_filters"): + model_config["conv_filters"] = get_filter_config(obs_space.shape) + + TorchModelV2.__init__( + self, obs_space, action_space, num_outputs, model_config, name + ) + nn.Module.__init__(self) + + activation = self.model_config.get("conv_activation") + filters = self.model_config["conv_filters"] + assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!" + + # Post FC net config. + post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) + post_fcnet_activation = get_activation_fn( + model_config.get("post_fcnet_activation"), framework="torch" + ) + + no_final_linear = self.model_config.get("no_final_linear") + vf_share_layers = self.model_config.get("vf_share_layers") + + # Whether the last layer is the output of a Flattened (rather than + # a n x (1,1) Conv2D). + self.last_layer_is_flattened = False + self._logits = None + + layers = [] + (w, h, in_channels) = obs_space.shape + + in_size = [w, h] + for out_channels, kernel, stride in filters[:-1]: + padding, out_size = same_padding(in_size, kernel, stride) + layers.append( + SlimConv2d( + in_channels, + out_channels, + kernel, + stride, + padding, + activation_fn=activation, + ) + ) + in_channels = out_channels + in_size = out_size + + out_channels, kernel, stride = filters[-1] + + # No final linear: Last layer has activation function and exits with + # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending + # on `post_fcnet_...` settings). + if no_final_linear and num_outputs: + out_channels = out_channels if post_fcnet_hiddens else num_outputs + layers.append( + SlimConv2d( + in_channels, + out_channels, + kernel, + stride, + None, # padding=valid + activation_fn=activation, + ) + ) + + # Add (optional) post-fc-stack after last Conv2D layer. + layer_sizes = post_fcnet_hiddens[:-1] + ( + [num_outputs] if post_fcnet_hiddens else [] + ) + for i, out_size in enumerate(layer_sizes): + layers.append( + SlimFC( + in_size=out_channels, + out_size=out_size, + activation_fn=post_fcnet_activation, + initializer=normc_initializer(1.0), + ) + ) + out_channels = out_size + + # Finish network normally (w/o overriding last layer size with + # `num_outputs`), then add another linear one of size `num_outputs`. + else: + layers.append( + SlimConv2d( + in_channels, + out_channels, + kernel, + stride, + None, # padding=valid + activation_fn=activation, + ) + ) + + # num_outputs defined. Use that to create an exact + # `num_output`-sized (1,1)-Conv2D. + if num_outputs: + in_size = [ + np.ceil((in_size[0] - kernel[0]) / stride), + np.ceil((in_size[1] - kernel[1]) / stride), + ] + padding, _ = same_padding(in_size, [1, 1], [1, 1]) + if post_fcnet_hiddens: + layers.append(nn.Flatten()) + in_size = out_channels + # Add (optional) post-fc-stack after last Conv2D layer. + for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): + layers.append( + SlimFC( + in_size=in_size, + out_size=out_size, + activation_fn=post_fcnet_activation + if i < len(post_fcnet_hiddens) - 1 + else None, + initializer=normc_initializer(1.0), + ) + ) + in_size = out_size + # Last layer is logits layer. + self._logits = layers.pop() + + else: + self._logits = SlimConv2d( + out_channels, + num_outputs, + [1, 1], + 1, + padding, + activation_fn=None, + ) + + # num_outputs not known -> Flatten, then set self.num_outputs + # to the resulting number of nodes. + else: + self.last_layer_is_flattened = True + layers.append(nn.Flatten()) + + self._convs = nn.Sequential(*layers) + + # If our num_outputs still unknown, we need to do a test pass to + # figure out the output dimensions. This could be the case, if we have + # the Flatten layer at the end. + if self.num_outputs is None: + # Create a B=1 dummy sample and push it through out conv-net. + dummy_in = ( + torch.from_numpy(self.obs_space.sample()) + .permute(2, 0, 1) + .unsqueeze(0) + .float() + ) + dummy_out = self._convs(dummy_in) + self.num_outputs = dummy_out.shape[1] + + # Build the value layers + self._value_branch_separate = self._value_branch = None + if vf_share_layers: + self._value_branch = SlimFC( + out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None + ) + else: + vf_layers = [] + (w, h, in_channels) = obs_space.shape + in_size = [w, h] + for out_channels, kernel, stride in filters[:-1]: + padding, out_size = same_padding(in_size, kernel, stride) + vf_layers.append( + SlimConv2d( + in_channels, + out_channels, + kernel, + stride, + padding, + activation_fn=activation, + ) + ) + in_channels = out_channels + in_size = out_size + + out_channels, kernel, stride = filters[-1] + vf_layers.append( + SlimConv2d( + in_channels, + out_channels, + kernel, + stride, + None, + activation_fn=activation, + ) + ) + + vf_layers.append( + SlimConv2d( + in_channels=out_channels, + out_channels=1, + kernel=1, + stride=1, + padding=None, + activation_fn=None, + ) + ) + self._value_branch_separate = nn.Sequential(*vf_layers) + + # Holds the current "base" output (before logits layer). + self._features = None + + @override(TorchModelV2) + def forward( + self, + input_dict: Dict[str, TensorType], + state: List[TensorType], + seq_lens: TensorType, + ) -> (TensorType, List[TensorType]): + self._features = input_dict["obs"].float() + # Permuate b/c data comes in as [B, dim, dim, channels]: + self._features = self._features.permute(0, 3, 1, 2) + conv_out = self._convs(self._features) + # Store features to save forward pass when getting value_function out. + if not self._value_branch_separate: + self._features = conv_out + + if not self.last_layer_is_flattened: + if self._logits: + conv_out = self._logits(conv_out) + if len(conv_out.shape) == 4: + if conv_out.shape[2] != 1 or conv_out.shape[3] != 1: + raise ValueError( + "Given `conv_filters` ({}) do not result in a [B, {} " + "(`num_outputs`), 1, 1] shape (but in {})! Please " + "adjust your Conv2D stack such that the last 2 dims " + "are both 1.".format( + self.model_config["conv_filters"], + self.num_outputs, + list(conv_out.shape), + ) + ) + logits = conv_out.squeeze(3) + logits = logits.squeeze(2) + else: + logits = conv_out + return logits, state + else: + return conv_out, state + + @override(TorchModelV2) + def value_function(self) -> TensorType: + assert self._features is not None, "must call forward() first" + if self._value_branch_separate: + value = self._value_branch_separate(self._features) + value = value.squeeze(3) + value = value.squeeze(2) + return value.squeeze(1) + else: + if not self.last_layer_is_flattened: + features = self._features.squeeze(3) + features = features.squeeze(2) + else: + features = self._features + return self._value_branch(features).squeeze(1) + + def _hidden_layers(self, obs: TensorType) -> TensorType: + res = self._convs(obs.permute(0, 3, 1, 2)) # switch to channel-major + res = res.squeeze(3) + res = res.squeeze(2) + return res