diff --git a/.gitattributes b/.gitattributes
index e27c55413e2c90c83fe5c168d4b812c6a13a95d7..666a47cc600b18661deb3997e62e07250eb2b034 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -171,3 +171,5 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/ray/_private/runtime_env/agent/thirdparty_files/aiohttp/_websocket/reader_c.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/ray/_private/thirdparty/tabulate/__pycache__/tabulate.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/ray/_private/runtime_env/agent/thirdparty_files/idna/__pycache__/idnadata.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/ray/_private/runtime_env/agent/thirdparty_files/propcache/_helpers_c.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/ray/jars/ray_dist.jar filter=lfs diff=lfs merge=lfs -text
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/__pycache__/process_watcher.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/__pycache__/process_watcher.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fcd03baf12046b8bd73d4832965c70eb296c0d3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/__pycache__/process_watcher.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__init__.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..71550bc43b45e21c88029f1012b4ed5973468f9e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__init__.py
@@ -0,0 +1,77 @@
+from typing import Set, Optional
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+from ray._private.accelerators.nvidia_gpu import NvidiaGPUAcceleratorManager
+from ray._private.accelerators.intel_gpu import IntelGPUAcceleratorManager
+from ray._private.accelerators.amd_gpu import AMDGPUAcceleratorManager
+from ray._private.accelerators.tpu import TPUAcceleratorManager
+from ray._private.accelerators.neuron import NeuronAcceleratorManager
+from ray._private.accelerators.hpu import HPUAcceleratorManager
+from ray._private.accelerators.npu import NPUAcceleratorManager
+
+
+def get_all_accelerator_managers() -> Set[AcceleratorManager]:
+    """Get all accelerator managers supported by Ray."""
+    return {
+        NvidiaGPUAcceleratorManager,
+        IntelGPUAcceleratorManager,
+        AMDGPUAcceleratorManager,
+        TPUAcceleratorManager,
+        NeuronAcceleratorManager,
+        HPUAcceleratorManager,
+        NPUAcceleratorManager,
+    }
+
+
+def get_all_accelerator_resource_names() -> Set[str]:
+    """Get all resource names for accelerators."""
+    return {
+        accelerator_manager.get_resource_name()
+        for accelerator_manager in get_all_accelerator_managers()
+    }
+
+
+def get_accelerator_manager_for_resource(
+    resource_name: str,
+) -> Optional[AcceleratorManager]:
+    """Get the corresponding accelerator manager for the given
+    accelerator resource name
+
+    E.g., TPUAcceleratorManager is returned if resource name is "TPU"
+    """
+    try:
+        return get_accelerator_manager_for_resource._resource_name_to_accelerator_manager.get(  # noqa: E501
+            resource_name, None
+        )
+    except AttributeError:
+        # Lazy initialization.
+        resource_name_to_accelerator_manager = {
+            accelerator_manager.get_resource_name(): accelerator_manager
+            for accelerator_manager in get_all_accelerator_managers()
+        }
+        # Special handling for GPU resource name since multiple accelerator managers
+        # have the same GPU resource name.
+        if AMDGPUAcceleratorManager.get_current_node_num_accelerators() > 0:
+            resource_name_to_accelerator_manager["GPU"] = AMDGPUAcceleratorManager
+        elif IntelGPUAcceleratorManager.get_current_node_num_accelerators() > 0:
+            resource_name_to_accelerator_manager["GPU"] = IntelGPUAcceleratorManager
+        else:
+            resource_name_to_accelerator_manager["GPU"] = NvidiaGPUAcceleratorManager
+        get_accelerator_manager_for_resource._resource_name_to_accelerator_manager = (
+            resource_name_to_accelerator_manager
+        )
+        return resource_name_to_accelerator_manager.get(resource_name, None)
+
+
+__all__ = [
+    "NvidiaGPUAcceleratorManager",
+    "IntelGPUAcceleratorManager",
+    "AMDGPUAcceleratorManager",
+    "TPUAcceleratorManager",
+    "NeuronAcceleratorManager",
+    "HPUAcceleratorManager",
+    "NPUAcceleratorManager",
+    "get_all_accelerator_managers",
+    "get_all_accelerator_resource_names",
+    "get_accelerator_manager_for_resource",
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3d28398b3a37fe9a4214daad6eb7f06800100ff3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/accelerator.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/accelerator.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c7509aa6ef7b8b1b346222a33588eccb43ab4ad
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/accelerator.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/amd_gpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/amd_gpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74e33de0cd25ef8dac1529c7f9ce909b9f8f621b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/amd_gpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/hpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/hpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90a2403b357688e627428139a6e71352c6a5b68e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/hpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/intel_gpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/intel_gpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fd79a52d55e666c6810afa52075586ff58c764b9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/intel_gpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/neuron.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/neuron.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e096da36830076ac2a457276daa27491efc5dc65
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/neuron.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/npu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/npu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0449ec02e6a2d984a2969320ec5172ea130559fb
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/npu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/nvidia_gpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/nvidia_gpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..857265915ba3b8f06580e772efbabc5b5541e36c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/nvidia_gpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/tpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/tpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f001939716c2d66e41f8cdce861fbfb8bef7bda4
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/__pycache__/tpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/accelerator.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/accelerator.py
new file mode 100644
index 0000000000000000000000000000000000000000..70178094e14cd0cccdd35fe8013fc50f877345b9
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/accelerator.py
@@ -0,0 +1,138 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Optional, List, Tuple
+
+
+class AcceleratorManager(ABC):
+    """This class contains all the functions needed for supporting
+    an accelerator family in Ray."""
+
+    @staticmethod
+    @abstractmethod
+    def get_resource_name() -> str:
+        """Get the name of the resource representing this accelerator family.
+
+        Returns:
+            The resource name: e.g., the resource name for Nvidia GPUs is "GPU"
+        """
+
+    @staticmethod
+    @abstractmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        """Get the env var that sets the ids of visible accelerators of this family.
+
+        Returns:
+            The env var for setting visible accelerator ids: e.g.,
+                CUDA_VISIBLE_DEVICES for Nvidia GPUs.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def get_current_node_num_accelerators() -> int:
+        """Get the total number of accelerators of this family on the current node.
+
+        Returns:
+            The detected total number of accelerators of this family.
+            Return 0 if the current node doesn't contain accelerators of this family.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        """Get the type of the accelerator of this family on the current node.
+
+        Currently Ray only supports single accelerator type of
+        an accelerator family on each node.
+
+        The result should only be used when get_current_node_num_accelerators() > 0.
+
+        Returns:
+            The detected accelerator type of this family: e.g., H100 for Nvidia GPU.
+            Return None if it's unknown or the node doesn't have
+            accelerators of this family.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def get_current_node_additional_resources() -> Optional[Dict[str, float]]:
+        """Get any additional resources required for the current node.
+
+        In case a particular accelerator type requires considerations for
+        additional resources (e.g. for TPUs, providing the TPU pod type and
+        TPU name), this function can be used to provide the
+        additional logical resources.
+
+        Returns:
+            A dictionary representing additional resources that may be
+            necessary for a particular accelerator type.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        """Validate the resource request quantity of this accelerator resource.
+
+        Args:
+            quantity: The resource request quantity to be validated.
+
+        Returns:
+            (valid, error_message) tuple: the first element of the tuple
+                indicates whether the given quantity is valid or not,
+                the second element is the error message
+                if the given quantity is invalid.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        """Get the ids of accelerators of this family that are visible to the current process.
+
+        Returns:
+            The list of visiable accelerator ids.
+            Return None if all accelerators are visible.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def set_current_process_visible_accelerator_ids(ids: List[str]) -> None:
+        """Set the ids of accelerators of this family that are visible to the current process.
+
+        Args:
+            ids: The ids of visible accelerators of this family.
+        """
+
+    @staticmethod
+    def get_ec2_instance_num_accelerators(
+        instance_type: str, instances: dict
+    ) -> Optional[int]:
+        """Get the number of accelerators of this family on ec2 instance with given type.
+
+        Args:
+            instance_type: The ec2 instance type.
+            instances: Map from ec2 instance type to instance metadata returned by
+                ec2 `describe-instance-types`.
+
+        Returns:
+            The number of accelerators of this family on the ec2 instance
+            with given type.
+            Return None if it's unknown.
+        """
+        return None
+
+    @staticmethod
+    def get_ec2_instance_accelerator_type(
+        instance_type: str, instances: dict
+    ) -> Optional[str]:
+        """Get the accelerator type of this family on ec2 instance with given type.
+
+        Args:
+            instance_type: The ec2 instance type.
+            instances: Map from ec2 instance type to instance metadata returned by
+                ec2 `describe-instance-types`.
+
+        Returns:
+            The accelerator type of this family on the ec2 instance with given type.
+            Return None if it's unknown.
+        """
+        return None
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/hpu.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/hpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..87bae0a9267ee0b4f69cf58f79af251eb4693b61
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/hpu.py
@@ -0,0 +1,121 @@
+import os
+import logging
+from typing import Optional, List, Tuple
+from functools import lru_cache
+from importlib.util import find_spec
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+HABANA_VISIBLE_DEVICES_ENV_VAR = "HABANA_VISIBLE_MODULES"
+NOSET_HABANA_VISIBLE_MODULES_ENV_VAR = "RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES"
+
+
+@lru_cache()
+def is_package_present(package_name: str) -> bool:
+    try:
+        return find_spec(package_name) is not None
+    except ModuleNotFoundError:
+        return False
+
+
+HPU_PACKAGE_AVAILABLE = is_package_present("habana_frameworks")
+
+
+class HPUAcceleratorManager(AcceleratorManager):
+    """Intel Habana(HPU) accelerators."""
+
+    @staticmethod
+    def get_resource_name() -> str:
+        return "HPU"
+
+    @staticmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        return HABANA_VISIBLE_DEVICES_ENV_VAR
+
+    @staticmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        hpu_visible_devices = os.environ.get(
+            HPUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+        )
+
+        if hpu_visible_devices is None:
+            return None
+
+        if hpu_visible_devices == "":
+            return []
+
+        return list(hpu_visible_devices.split(","))
+
+    @staticmethod
+    def get_current_node_num_accelerators() -> int:
+        """Attempt to detect the number of HPUs on this machine.
+        Returns:
+            The number of HPUs if any were detected, otherwise 0.
+        """
+        if HPU_PACKAGE_AVAILABLE:
+            import habana_frameworks.torch.hpu as torch_hpu
+
+            if torch_hpu.is_available():
+                return torch_hpu.device_count()
+            else:
+                logging.info("HPU devices not available")
+                return 0
+        else:
+            return 0
+
+    @staticmethod
+    def is_initialized() -> bool:
+        """Attempt to check if HPU backend is initialized.
+        Returns:
+            True if backend initialized else False.
+        """
+        if HPU_PACKAGE_AVAILABLE:
+            import habana_frameworks.torch.hpu as torch_hpu
+
+            if torch_hpu.is_available() and torch_hpu.is_initialized():
+                return True
+            else:
+                return False
+        else:
+            return False
+
+    @staticmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        """Attempt to detect the HPU family type.
+        Returns:
+            The device name (GAUDI, GAUDI2) if detected else None.
+        """
+        if HPUAcceleratorManager.is_initialized():
+            import habana_frameworks.torch.hpu as torch_hpu
+
+            return f"Intel-{torch_hpu.get_device_name()}"
+        else:
+            logging.info("HPU type cannot be detected")
+            return None
+
+    @staticmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        if isinstance(quantity, float) and not quantity.is_integer():
+            return (
+                False,
+                f"{HPUAcceleratorManager.get_resource_name()} resource quantity"
+                " must be whole numbers. "
+                f"The specified quantity {quantity} is invalid.",
+            )
+        else:
+            return (True, None)
+
+    @staticmethod
+    def set_current_process_visible_accelerator_ids(
+        visible_hpu_devices: List[str],
+    ) -> None:
+        if os.environ.get(NOSET_HABANA_VISIBLE_MODULES_ENV_VAR):
+            return
+
+        os.environ[
+            HPUAcceleratorManager.get_visible_accelerator_ids_env_var()
+        ] = ",".join([str(i) for i in visible_hpu_devices])
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/intel_gpu.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/intel_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd6f1c0fcbb1426f761ab407a92a188bb185dcec
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/intel_gpu.py
@@ -0,0 +1,103 @@
+import os
+import logging
+from typing import Optional, List, Tuple
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+ONEAPI_DEVICE_SELECTOR_ENV_VAR = "ONEAPI_DEVICE_SELECTOR"
+NOSET_ONEAPI_DEVICE_SELECTOR_ENV_VAR = "RAY_EXPERIMENTAL_NOSET_ONEAPI_DEVICE_SELECTOR"
+ONEAPI_DEVICE_BACKEND_TYPE = "level_zero"
+ONEAPI_DEVICE_TYPE = "gpu"
+
+
+class IntelGPUAcceleratorManager(AcceleratorManager):
+    """Intel GPU accelerators."""
+
+    @staticmethod
+    def get_resource_name() -> str:
+        return "GPU"
+
+    @staticmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        return ONEAPI_DEVICE_SELECTOR_ENV_VAR
+
+    @staticmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        oneapi_visible_devices = os.environ.get(
+            IntelGPUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+        )
+        if oneapi_visible_devices is None:
+            return None
+
+        if oneapi_visible_devices == "":
+            return []
+
+        if oneapi_visible_devices == "NoDevFiles":
+            return []
+
+        prefix = ONEAPI_DEVICE_BACKEND_TYPE + ":"
+
+        return list(oneapi_visible_devices.split(prefix)[1].split(","))
+
+    @staticmethod
+    def get_current_node_num_accelerators() -> int:
+        try:
+            import dpctl
+        except ImportError:
+            dpctl = None
+        if dpctl is None:
+            return 0
+
+        num_gpus = 0
+        try:
+            dev_info = ONEAPI_DEVICE_BACKEND_TYPE + ":" + ONEAPI_DEVICE_TYPE
+            context = dpctl.SyclContext(dev_info)
+            num_gpus = context.device_count
+        except Exception:
+            num_gpus = 0
+        return num_gpus
+
+    @staticmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        """Get the name of first Intel GPU. (supposed only one GPU type on a node)
+        Example:
+            name: 'Intel(R) Data Center GPU Max 1550'
+            return name: 'Intel-GPU-Max-1550'
+        Returns:
+            A string representing the name of Intel GPU type.
+        """
+        try:
+            import dpctl
+        except ImportError:
+            dpctl = None
+        if dpctl is None:
+            return None
+
+        accelerator_type = None
+        try:
+            dev_info = ONEAPI_DEVICE_BACKEND_TYPE + ":" + ONEAPI_DEVICE_TYPE + ":0"
+            dev = dpctl.SyclDevice(dev_info)
+            accelerator_type = "Intel-GPU-" + "-".join(dev.name.split(" ")[-2:])
+        except Exception:
+            accelerator_type = None
+        return accelerator_type
+
+    @staticmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        return (True, None)
+
+    @staticmethod
+    def set_current_process_visible_accelerator_ids(
+        visible_xpu_devices: List[str],
+    ) -> None:
+        if os.environ.get(NOSET_ONEAPI_DEVICE_SELECTOR_ENV_VAR):
+            return
+
+        prefix = ONEAPI_DEVICE_BACKEND_TYPE + ":"
+        os.environ[
+            IntelGPUAcceleratorManager.get_visible_accelerator_ids_env_var()
+        ] = prefix + ",".join([str(i) for i in visible_xpu_devices])
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/neuron.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/neuron.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ba9eeb0666b0c0c20c7c6ae89f27cde6011450d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/neuron.py
@@ -0,0 +1,132 @@
+import os
+import sys
+import json
+import logging
+import subprocess
+from typing import Optional, List, Tuple
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+NEURON_RT_VISIBLE_CORES_ENV_VAR = "NEURON_RT_VISIBLE_CORES"
+NOSET_AWS_NEURON_RT_VISIBLE_CORES_ENV_VAR = (
+    "RAY_EXPERIMENTAL_NOSET_NEURON_RT_VISIBLE_CORES"
+)
+
+# https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-hardware/inf2-arch.html#aws-inf2-arch
+# https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-hardware/trn1-arch.html#aws-trn1-arch
+# Subject to removal after the information is available via public API
+AWS_NEURON_INSTANCE_MAP = {
+    "trn1.2xlarge": 2,
+    "trn1.32xlarge": 32,
+    "trn1n.32xlarge": 32,
+    "inf2.xlarge": 2,
+    "inf2.8xlarge": 2,
+    "inf2.24xlarge": 12,
+    "inf2.48xlarge": 24,
+}
+
+
+class NeuronAcceleratorManager(AcceleratorManager):
+    """AWS Inferentia and Trainium accelerators."""
+
+    @staticmethod
+    def get_resource_name() -> str:
+        return "neuron_cores"
+
+    @staticmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        return NEURON_RT_VISIBLE_CORES_ENV_VAR
+
+    @staticmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        neuron_visible_cores = os.environ.get(
+            NeuronAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+        )
+
+        if neuron_visible_cores is None:
+            return None
+
+        if neuron_visible_cores == "":
+            return []
+
+        return list(neuron_visible_cores.split(","))
+
+    @staticmethod
+    def get_current_node_num_accelerators() -> int:
+        """
+        Attempt to detect the number of Neuron cores on this machine.
+
+        Returns:
+            The number of Neuron cores if any were detected, otherwise 0.
+        """
+        nc_count: int = 0
+        neuron_path = "/opt/aws/neuron/bin/"
+        if sys.platform.startswith("linux") and os.path.isdir(neuron_path):
+            result = subprocess.run(
+                [os.path.join(neuron_path, "neuron-ls"), "--json-output"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            if result.returncode == 0 and result.stdout:
+                neuron_devices = json.loads(result.stdout)
+                for neuron_device in neuron_devices:
+                    nc_count += neuron_device.get("nc_count", 0)
+        return nc_count
+
+    @staticmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        from ray.util.accelerators import AWS_NEURON_CORE
+
+        return AWS_NEURON_CORE
+
+    @staticmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        if isinstance(quantity, float) and not quantity.is_integer():
+            return (
+                False,
+                f"{NeuronAcceleratorManager.get_resource_name()} resource quantity"
+                " must be whole numbers. "
+                f"The specified quantity {quantity} is invalid.",
+            )
+        else:
+            return (True, None)
+
+    @staticmethod
+    def set_current_process_visible_accelerator_ids(
+        visible_neuron_core_ids: List[str],
+    ) -> None:
+        """Set the NEURON_RT_VISIBLE_CORES environment variable based on
+        given visible_neuron_core_ids.
+
+        Args:
+            visible_neuron_core_ids (List[str]): List of int representing core IDs.
+        """
+        if os.environ.get(NOSET_AWS_NEURON_RT_VISIBLE_CORES_ENV_VAR):
+            return
+
+        os.environ[
+            NeuronAcceleratorManager.get_visible_accelerator_ids_env_var()
+        ] = ",".join([str(i) for i in visible_neuron_core_ids])
+
+    @staticmethod
+    def get_ec2_instance_num_accelerators(
+        instance_type: str, instances: dict
+    ) -> Optional[int]:
+        # TODO: AWS SDK (public API) doesn't yet expose the NeuronCore
+        # information. It will be available (work-in-progress)
+        # as xxAcceleratorInfo in InstanceTypeInfo.
+        # https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceTypeInfo.html
+        # See https://github.com/ray-project/ray/issues/38473
+        return AWS_NEURON_INSTANCE_MAP.get(instance_type.lower(), None)
+
+    @staticmethod
+    def get_ec2_instance_accelerator_type(
+        instance_type: str, instances: dict
+    ) -> Optional[str]:
+        from ray.util.accelerators import AWS_NEURON_CORE
+
+        return AWS_NEURON_CORE
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/npu.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/npu.py
new file mode 100644
index 0000000000000000000000000000000000000000..d98434cd302ae64c6cfc0df328580fc4d549855a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/npu.py
@@ -0,0 +1,99 @@
+import os
+import glob
+import logging
+from typing import Optional, List, Tuple
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+ASCEND_RT_VISIBLE_DEVICES_ENV_VAR = "ASCEND_RT_VISIBLE_DEVICES"
+NOSET_ASCEND_RT_VISIBLE_DEVICES_ENV_VAR = (
+    "RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES"
+)
+
+
+class NPUAcceleratorManager(AcceleratorManager):
+    """Ascend NPU accelerators."""
+
+    @staticmethod
+    def get_resource_name() -> str:
+        return "NPU"
+
+    @staticmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        return ASCEND_RT_VISIBLE_DEVICES_ENV_VAR
+
+    @staticmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        ascend_visible_devices = os.environ.get(
+            NPUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+        )
+
+        if ascend_visible_devices is None:
+            return None
+
+        if ascend_visible_devices == "":
+            return []
+
+        if ascend_visible_devices == "NoDevFiles":
+            return []
+
+        return list(ascend_visible_devices.split(","))
+
+    @staticmethod
+    def get_current_node_num_accelerators() -> int:
+        """Attempt to detect the number of NPUs on this machine.
+
+        NPU chips are represented as devices within `/dev/`, either as `/dev/davinci?`.
+
+        Returns:
+            The number of NPUs if any were detected, otherwise 0.
+        """
+        try:
+            import acl
+
+            device_count, ret = acl.rt.get_device_count()
+            if ret == 0:
+                return device_count
+        except Exception as e:
+            logger.debug("Could not import AscendCL: %s", e)
+
+        try:
+            npu_files = glob.glob("/dev/davinci[0-9]*")
+            return len(npu_files)
+        except Exception as e:
+            logger.debug("Failed to detect number of NPUs: %s", e)
+        return 0
+
+    @staticmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        """Get the type of the Ascend NPU on the current node.
+
+        Returns:
+            A string of the type, such as "Ascend910A", "Ascend910B", "Ascend310P1".
+        """
+        try:
+            import acl
+
+            return acl.get_soc_name()
+        except Exception:
+            logger.exception("Failed to detect NPU type.")
+        return None
+
+    @staticmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        return (True, None)
+
+    @staticmethod
+    def set_current_process_visible_accelerator_ids(
+        visible_npu_devices: List[str],
+    ) -> None:
+        if os.environ.get(NOSET_ASCEND_RT_VISIBLE_DEVICES_ENV_VAR):
+            return
+
+        os.environ[
+            NPUAcceleratorManager.get_visible_accelerator_ids_env_var()
+        ] = ",".join([str(i) for i in visible_npu_devices])
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/nvidia_gpu.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/nvidia_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eaafb5a6e06e006cdd950f0f033c2c2724ac0de
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/nvidia_gpu.py
@@ -0,0 +1,128 @@
+import re
+import os
+import logging
+from typing import Optional, List, Tuple
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+CUDA_VISIBLE_DEVICES_ENV_VAR = "CUDA_VISIBLE_DEVICES"
+NOSET_CUDA_VISIBLE_DEVICES_ENV_VAR = "RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"
+
+# TODO(Alex): This pattern may not work for non NVIDIA Tesla GPUs (which have
+# the form "Tesla V100-SXM2-16GB" or "Tesla K80").
+NVIDIA_GPU_NAME_PATTERN = re.compile(r"\w+\s+([A-Z0-9]+)")
+
+
+class NvidiaGPUAcceleratorManager(AcceleratorManager):
+    """Nvidia GPU accelerators."""
+
+    @staticmethod
+    def get_resource_name() -> str:
+        return "GPU"
+
+    @staticmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        return CUDA_VISIBLE_DEVICES_ENV_VAR
+
+    @staticmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        cuda_visible_devices = os.environ.get(
+            NvidiaGPUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+        )
+        if cuda_visible_devices is None:
+            return None
+
+        if cuda_visible_devices == "":
+            return []
+
+        if cuda_visible_devices == "NoDevFiles":
+            return []
+
+        return list(cuda_visible_devices.split(","))
+
+    @staticmethod
+    def get_current_node_num_accelerators() -> int:
+        import ray._private.thirdparty.pynvml as pynvml
+
+        try:
+            pynvml.nvmlInit()
+        except pynvml.NVMLError:
+            return 0  # pynvml init failed
+        device_count = pynvml.nvmlDeviceGetCount()
+        pynvml.nvmlShutdown()
+        return device_count
+
+    @staticmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        import ray._private.thirdparty.pynvml as pynvml
+
+        try:
+            pynvml.nvmlInit()
+        except pynvml.NVMLError:
+            return None  # pynvml init failed
+        device_count = pynvml.nvmlDeviceGetCount()
+        cuda_device_type = None
+        if device_count > 0:
+            handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+            device_name = pynvml.nvmlDeviceGetName(handle)
+            if isinstance(device_name, bytes):
+                device_name = device_name.decode("utf-8")
+            cuda_device_type = (
+                NvidiaGPUAcceleratorManager._gpu_name_to_accelerator_type(device_name)
+            )
+        pynvml.nvmlShutdown()
+        return cuda_device_type
+
+    @staticmethod
+    def _gpu_name_to_accelerator_type(name):
+        if name is None:
+            return None
+        match = NVIDIA_GPU_NAME_PATTERN.match(name)
+        return match.group(1) if match else None
+
+    @staticmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        return (True, None)
+
+    @staticmethod
+    def set_current_process_visible_accelerator_ids(
+        visible_cuda_devices: List[str],
+    ) -> None:
+        if os.environ.get(NOSET_CUDA_VISIBLE_DEVICES_ENV_VAR):
+            return
+
+        os.environ[
+            NvidiaGPUAcceleratorManager.get_visible_accelerator_ids_env_var()
+        ] = ",".join([str(i) for i in visible_cuda_devices])
+
+    @staticmethod
+    def get_ec2_instance_num_accelerators(
+        instance_type: str, instances: dict
+    ) -> Optional[int]:
+        if instance_type not in instances:
+            return None
+
+        gpus = instances[instance_type].get("GpuInfo", {}).get("Gpus")
+        if gpus is not None:
+            # TODO(ameer): currently we support one gpu type per node.
+            assert len(gpus) == 1
+            return gpus[0]["Count"]
+        return None
+
+    @staticmethod
+    def get_ec2_instance_accelerator_type(
+        instance_type: str, instances: dict
+    ) -> Optional[str]:
+        if instance_type not in instances:
+            return None
+
+        gpus = instances[instance_type].get("GpuInfo", {}).get("Gpus")
+        if gpus is not None:
+            # TODO(ameer): currently we support one gpu type per node.
+            assert len(gpus) == 1
+            return gpus[0]["Name"]
+        return None
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/accelerators/tpu.py b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/tpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..1349606e8ad3f6bb91057cd5af033aa133e2bcc3
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/accelerators/tpu.py
@@ -0,0 +1,393 @@
+import os
+import re
+import glob
+import requests
+import logging
+from functools import lru_cache
+from typing import Dict, Optional, List, Tuple
+
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+
+TPU_VALID_CHIP_OPTIONS = (1, 2, 4, 8)
+GKE_TPU_ACCELERATOR_TYPE_ENV_VAR = "TPU_ACCELERATOR_TYPE"
+GKE_TPU_WORKER_ID_ENV_VAR = "TPU_WORKER_ID"
+GKE_TPU_NAME_ENV_VAR = "TPU_NAME"
+
+# Constants for accessing the `accelerator-type` from TPU VM
+# instance metadata.
+# See https://cloud.google.com/compute/docs/metadata/overview
+# for more details about VM instance metadata.
+GCE_TPU_ACCELERATOR_ENDPOINT = (
+    "http://metadata.google.internal/computeMetadata/v1/instance/attributes/"
+)
+GCE_TPU_HEADERS = {"Metadata-Flavor": "Google"}
+GCE_TPU_ACCELERATOR_KEY = "accelerator-type"
+GCE_TPU_INSTANCE_ID_KEY = "instance-id"
+GCE_TPU_WORKER_ID_KEY = "agent-worker-number"
+
+TPU_VISIBLE_CHIPS_ENV_VAR = "TPU_VISIBLE_CHIPS"
+
+NOSET_TPU_VISIBLE_CHIPS_ENV_VAR = "RAY_EXPERIMENTAL_NOSET_TPU_VISIBLE_CHIPS"
+
+# The following defines environment variables that allow
+# us to access a subset of TPU visible chips.
+#
+# See: https://github.com/google/jax/issues/14977 for an example/more details.
+TPU_CHIPS_PER_HOST_BOUNDS_ENV_VAR = "TPU_CHIPS_PER_HOST_BOUNDS"
+TPU_CHIPS_PER_HOST_BOUNDS_1_CHIP_CONFIG = "1,1,1"
+TPU_CHIPS_PER_HOST_BOUNDS_2_CHIP_CONFIG = "1,2,1"
+
+TPU_HOST_BOUNDS_ENV_VAR = "TPU_HOST_BOUNDS"
+TPU_SINGLE_HOST_BOUNDS = "1,1,1"
+
+
+def _get_tpu_metadata(key: str) -> Optional[str]:
+    """Poll and get TPU metadata."""
+    try:
+        accelerator_type_request = requests.get(
+            os.path.join(GCE_TPU_ACCELERATOR_ENDPOINT, key),
+            headers=GCE_TPU_HEADERS,
+        )
+        if (
+            accelerator_type_request.status_code == 200
+            and accelerator_type_request.text
+        ):
+            return accelerator_type_request.text
+        else:
+            logging.debug(
+                "Unable to poll TPU GCE Metadata. Got "
+                f"status code: {accelerator_type_request.status_code} and "
+                f"content: {accelerator_type_request.text}"
+            )
+    except requests.RequestException as e:
+        logging.debug("Unable to poll the TPU GCE Metadata: %s", e)
+    return None
+
+
+class TPUAcceleratorManager(AcceleratorManager):
+    """Google TPU accelerators."""
+
+    @staticmethod
+    def get_resource_name() -> str:
+        return "TPU"
+
+    @staticmethod
+    def get_visible_accelerator_ids_env_var() -> str:
+        return TPU_VISIBLE_CHIPS_ENV_VAR
+
+    @staticmethod
+    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+        tpu_visible_chips = os.environ.get(
+            TPUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+        )
+
+        if tpu_visible_chips is None:
+            return None
+
+        if tpu_visible_chips == "":
+            return []
+
+        return list(tpu_visible_chips.split(","))
+
+    @staticmethod
+    @lru_cache()
+    def get_current_node_num_accelerators() -> int:
+        """Attempt to detect the number of TPUs on this machine.
+
+        TPU chips are represented as devices within `/dev/`, either as
+        `/dev/accel*` or `/dev/vfio/*`.
+
+        Returns:
+            The number of TPUs if any were detected, otherwise 0.
+        """
+        accel_files = glob.glob("/dev/accel*")
+        if accel_files:
+            return len(accel_files)
+
+        try:
+            vfio_entries = os.listdir("/dev/vfio")
+            numeric_entries = [int(entry) for entry in vfio_entries if entry.isdigit()]
+            return len(numeric_entries)
+        except FileNotFoundError as e:
+            logger.debug("Failed to detect number of TPUs: %s", e)
+            return 0
+
+    @staticmethod
+    def is_valid_tpu_accelerator_type(tpu_accelerator_type: str) -> bool:
+        """Check whether the tpu accelerator_type is formatted correctly.
+
+        The accelerator_type field follows a form of v{generation}-{cores/chips}.
+
+        See the following for more information:
+        https://cloud.google.com/sdk/gcloud/reference/compute/tpus/tpu-vm/accelerator-types/describe
+
+        Args:
+            tpu_accelerator_type: The string representation of the accelerator type
+                to be checked for validity.
+
+        Returns:
+            True if it's valid, false otherwise.
+        """
+        expected_pattern = re.compile(r"^v\d+[a-zA-Z]*-\d+$")
+        if not expected_pattern.match(tpu_accelerator_type):
+            return False
+        return True
+
+    @staticmethod
+    def validate_resource_request_quantity(
+        quantity: float,
+    ) -> Tuple[bool, Optional[str]]:
+        if quantity not in TPU_VALID_CHIP_OPTIONS:
+            return (
+                False,
+                f"The number of requested 'TPU' was set to {quantity} which "
+                "is not a supported chip configuration. Supported configs: "
+                f"{TPU_VALID_CHIP_OPTIONS}",
+            )
+        else:
+            return (True, None)
+
+    @staticmethod
+    def set_current_process_visible_accelerator_ids(
+        visible_tpu_chips: List[str],
+    ) -> None:
+        """Set TPU environment variables based on the provided visible_tpu_chips.
+
+        To access a subset of the TPU visible chips, we must use a combination of
+        environment variables that tells the compiler (via ML framework) the:
+        - Visible chips
+        - The physical bounds of chips per host
+        - The host bounds within the context of a TPU pod.
+
+        See: https://github.com/google/jax/issues/14977 for an example/more details.
+
+        Args:
+            visible_tpu_chips (List[str]): List of int representing TPU chips.
+        """
+        if os.environ.get(NOSET_TPU_VISIBLE_CHIPS_ENV_VAR):
+            return
+
+        num_visible_tpu_chips = len(visible_tpu_chips)
+        num_accelerators_on_node = (
+            TPUAcceleratorManager.get_current_node_num_accelerators()
+        )
+        if num_visible_tpu_chips == num_accelerators_on_node:
+            # Let the ML framework use the defaults
+            os.environ.pop(TPU_CHIPS_PER_HOST_BOUNDS_ENV_VAR, None)
+            os.environ.pop(TPU_HOST_BOUNDS_ENV_VAR, None)
+            return
+        os.environ[
+            TPUAcceleratorManager.get_visible_accelerator_ids_env_var()
+        ] = ",".join([str(i) for i in visible_tpu_chips])
+        if num_visible_tpu_chips == 1:
+            os.environ[
+                TPU_CHIPS_PER_HOST_BOUNDS_ENV_VAR
+            ] = TPU_CHIPS_PER_HOST_BOUNDS_1_CHIP_CONFIG
+            os.environ[TPU_HOST_BOUNDS_ENV_VAR] = TPU_SINGLE_HOST_BOUNDS
+        elif num_visible_tpu_chips == 2:
+            os.environ[
+                TPU_CHIPS_PER_HOST_BOUNDS_ENV_VAR
+            ] = TPU_CHIPS_PER_HOST_BOUNDS_2_CHIP_CONFIG
+            os.environ[TPU_HOST_BOUNDS_ENV_VAR] = TPU_SINGLE_HOST_BOUNDS
+
+    @staticmethod
+    def _get_current_node_tpu_pod_type() -> Optional[str]:
+        """Get the TPU pod type of the current node if applicable.
+
+        Individual TPU VMs within a TPU pod must know what type
+        of pod it is a part of. This is necessary for the
+        ML framework to work properly.
+
+        The logic is different if the TPU was provisioned via:
+        ```
+        gcloud tpus tpu-vm create ...
+        ```
+        (i.e. a GCE VM), vs through GKE:
+        - GCE VMs will always have a metadata server to poll this info
+        - GKE VMS will have environment variables preset.
+
+        Returns:
+            A string representing the current TPU pod type, e.g.
+            v4-16.
+
+        """
+        # Start with GKE-based check
+        accelerator_type = os.getenv(GKE_TPU_ACCELERATOR_TYPE_ENV_VAR, "")
+        if not accelerator_type:
+            # GCE-based VM check
+            accelerator_type = _get_tpu_metadata(key=GCE_TPU_ACCELERATOR_KEY)
+        if accelerator_type and TPUAcceleratorManager.is_valid_tpu_accelerator_type(
+            tpu_accelerator_type=accelerator_type
+        ):
+            return accelerator_type
+        logging.debug("Failed to get a valid accelerator type.")
+        return None
+
+    @staticmethod
+    def get_current_node_tpu_name() -> Optional[str]:
+        """Return the name of the TPU pod that this worker node is a part of.
+
+        For instance, if the TPU was created with name "my-tpu", this function
+        will return "my-tpu".
+
+        If created through the Ray cluster launcher, the
+        name will typically be something like "ray-my-tpu-cluster-worker-aa946781-tpu".
+
+        In case the TPU was created through KubeRay, we currently expect that the
+        environment variable TPU_NAME is set per TPU pod slice, in which case
+        this function will return the value of that environment variable.
+
+        """
+        try:
+            # Start with GKE-based check
+            tpu_name = os.getenv(GKE_TPU_NAME_ENV_VAR, None)
+            if not tpu_name:
+                # GCE-based VM check
+                tpu_name = _get_tpu_metadata(key=GCE_TPU_INSTANCE_ID_KEY)
+            return tpu_name
+        except ValueError as e:
+            logging.debug("Could not get TPU name: %s", e)
+            return None
+
+    @staticmethod
+    def _get_current_node_tpu_worker_id() -> Optional[int]:
+        """Return the worker index of the TPU pod."""
+        try:
+            # Start with GKE-based check
+            worker_id = os.getenv(GKE_TPU_WORKER_ID_ENV_VAR, None)
+            if not worker_id:
+                # GCE-based VM check
+                worker_id = _get_tpu_metadata(key=GCE_TPU_WORKER_ID_KEY)
+            if worker_id:
+                return int(worker_id)
+            else:
+                return None
+        except ValueError as e:
+            logging.debug("Could not get TPU worker id: %s", e)
+            return None
+
+    @staticmethod
+    def get_num_workers_in_current_tpu_pod() -> Optional[int]:
+        """Return the total number of workers in a TPU pod."""
+        tpu_pod_type = TPUAcceleratorManager._get_current_node_tpu_pod_type()
+        cores_per_host = TPUAcceleratorManager.get_current_node_num_accelerators()
+        if tpu_pod_type and cores_per_host > 0:
+            num_chips_or_cores = int(tpu_pod_type.split("-")[1])
+            return num_chips_or_cores // cores_per_host
+        else:
+            logging.debug("Could not get num workers in TPU pod.")
+            return None
+
+    @staticmethod
+    def get_current_node_accelerator_type() -> Optional[str]:
+        """Attempt to detect the TPU accelerator type.
+
+        The output of this function will return the "ray accelerator type"
+        resource (e.g. TPU-V4) that indicates the TPU version.
+
+        We also expect that our TPU nodes contain a "TPU pod type"
+        resource, which indicates information about the topology of
+        the TPU pod slice.
+
+        We expect that the "TPU pod type" resource to be used when
+        running multi host workers, i.e. when TPU units are pod slices.
+
+        We expect that the "ray accelerator type" resource to be used when
+        running single host workers, i.e. when TPU units are single hosts.
+
+        Returns:
+            A string representing the TPU accelerator type,
+            e.g. "TPU-V2", "TPU-V3", "TPU-V4" if applicable, else None.
+
+        """
+
+        def tpu_pod_type_to_ray_accelerator_type(
+            tpu_pod_type: str,
+        ) -> Optional[str]:
+            return "TPU-" + str(tpu_pod_type.split("-")[0].upper())
+
+        ray_accelerator_type = None
+        tpu_pod_type = TPUAcceleratorManager._get_current_node_tpu_pod_type()
+
+        if tpu_pod_type is not None:
+            ray_accelerator_type = tpu_pod_type_to_ray_accelerator_type(
+                tpu_pod_type=tpu_pod_type
+            )
+            if ray_accelerator_type is None:
+                logger.info(
+                    "While trying to autodetect a TPU type, "
+                    f"received malformed accelerator_type: {tpu_pod_type}"
+                )
+
+        if ray_accelerator_type is None:
+            logging.info("Failed to auto-detect TPU type.")
+
+        return ray_accelerator_type
+
+    def get_current_node_additional_resources() -> Optional[Dict[str, float]]:
+        """Get additional resources required for TPU nodes.
+
+        This will populate the TPU pod type and the TPU name which
+        is used for TPU pod execution.
+
+        When running workloads on a TPU pod, we need a way to run
+        the same binary on every worker in the TPU pod.
+
+        See https://jax.readthedocs.io/en/latest/multi_process.html
+        for more information.
+
+        To do this in ray, we take advantage of custom resources. We
+        mark worker 0 of the TPU pod as a "coordinator" that identifies
+        the other workers in the TPU pod. We therefore need:
+        - worker 0 to be targetable.
+        - all workers in the TPU pod to have a unique identifier consistent
+        within a TPU pod.
+
+        So assuming we want to run the following workload:
+
+        @ray.remote
+        def my_jax_fn():
+            import jax
+            return jax.device_count()
+
+        We could broadcast this on a TPU pod (e.g. a v4-16) as follows:
+
+        @ray.remote(resources={"TPU-v4-16-head"})
+        def run_jax_fn(executable):
+            # Note this will execute on worker 0
+            tpu_name = ray.util.accelerators.tpu.get_tpu_pod_name()
+            num_workers = ray.util.accelerators.tpu.get_tpu_num_workers()
+            tpu_executable = executable.options(resources={"TPU": 4, tpu_name: 1})
+            return [tpu_executable.remote() for _ in range(num_workers)]
+
+        Returns:
+            A dictionary representing additional resources that may be
+            necessary for a particular accelerator type.
+
+        """
+        resources = {}
+        tpu_name = TPUAcceleratorManager.get_current_node_tpu_name()
+        worker_id = TPUAcceleratorManager._get_current_node_tpu_worker_id()
+        tpu_pod_type = TPUAcceleratorManager._get_current_node_tpu_pod_type()
+
+        if tpu_name and worker_id is not None and tpu_pod_type:
+            pod_head_resource_name = f"TPU-{tpu_pod_type}-head"
+            # Add the name of the TPU to the resource.
+            resources[tpu_name] = 1
+            # Only add in the TPU pod type resource to worker 0.
+            if worker_id == 0:
+                resources[pod_head_resource_name] = 1
+        else:
+            logging.info(
+                "Failed to configure TPU pod. Got: "
+                "tpu_name: %s, worker_id: %s, accelerator_type: %s",
+                tpu_name,
+                worker_id,
+                tpu_pod_type,
+            )
+        if resources:
+            return resources
+        return None
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/runtime_env/agent/thirdparty_files/propcache/_helpers_c.cpython-311-x86_64-linux-gnu.so b/.venv/lib/python3.11/site-packages/ray/_private/runtime_env/agent/thirdparty_files/propcache/_helpers_c.cpython-311-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..9e372717ed4022ee51bc656029c205dc4ce03877
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/runtime_env/agent/thirdparty_files/propcache/_helpers_c.cpython-311-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a87371c20cf73e0fe5df7f255ec4523368eff6d0a6e61a6fd6a730892a134935
+size 800728
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/usage/__init__.py b/.venv/lib/python3.11/site-packages/ray/_private/usage/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a65778f0645edb1b0c0eb785caf7c7f296d47ed
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/usage_constants.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/usage_constants.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d93038f3703ab6e1231d9ceaa48c9a202bdbd1c7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/usage_constants.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/usage_lib.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/usage_lib.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d68388d3f02fe25f47263f9ae8cefef12b578f65
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/usage/__pycache__/usage_lib.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/usage/usage_constants.py b/.venv/lib/python3.11/site-packages/ray/_private/usage/usage_constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b5b97ad175e334aa963e2f02629329830c57ff8
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/usage/usage_constants.py
@@ -0,0 +1,63 @@
+SCHEMA_VERSION = "0.1"
+
+# The key to store / obtain cluster metadata.
+CLUSTER_METADATA_KEY = b"CLUSTER_METADATA"
+
+# The name of a json file where usage stats will be written.
+USAGE_STATS_FILE = "usage_stats.json"
+
+USAGE_STATS_ENABLED_ENV_VAR = "RAY_USAGE_STATS_ENABLED"
+
+USAGE_STATS_SOURCE_ENV_VAR = "RAY_USAGE_STATS_SOURCE"
+
+USAGE_STATS_SOURCE_OSS = "OSS"
+
+USAGE_STATS_ENABLED_FOR_CLI_MESSAGE = (
+    "Usage stats collection is enabled. To disable this, add `--disable-usage-stats` "
+    "to the command that starts the cluster, or run the following command:"
+    " `ray disable-usage-stats` before starting the cluster. "
+    "See https://docs.ray.io/en/master/cluster/usage-stats.html for more details."
+)
+
+USAGE_STATS_ENABLED_FOR_RAY_INIT_MESSAGE = (
+    "Usage stats collection is enabled. To disable this, run the following command:"
+    " `ray disable-usage-stats` before starting Ray. "
+    "See https://docs.ray.io/en/master/cluster/usage-stats.html for more details."
+)
+
+USAGE_STATS_DISABLED_MESSAGE = "Usage stats collection is disabled."
+
+USAGE_STATS_ENABLED_BY_DEFAULT_FOR_CLI_MESSAGE = (
+    "Usage stats collection is enabled by default without user confirmation "
+    "because this terminal is detected to be non-interactive. "
+    "To disable this, add `--disable-usage-stats` to the command that starts "
+    "the cluster, or run the following command:"
+    " `ray disable-usage-stats` before starting the cluster. "
+    "See https://docs.ray.io/en/master/cluster/usage-stats.html for more details."
+)
+
+USAGE_STATS_ENABLED_BY_DEFAULT_FOR_RAY_INIT_MESSAGE = (
+    "Usage stats collection is enabled by default for nightly wheels. "
+    "To disable this, run the following command:"
+    " `ray disable-usage-stats` before starting Ray. "
+    "See https://docs.ray.io/en/master/cluster/usage-stats.html for more details."
+)
+
+USAGE_STATS_CONFIRMATION_MESSAGE = (
+    "Enable usage stats collection? "
+    "This prompt will auto-proceed in 10 seconds to avoid blocking cluster startup."
+)
+
+LIBRARY_USAGE_SET_NAME = "library_usage_"
+
+HARDWARE_USAGE_SET_NAME = "hardware_usage_"
+
+# Keep in-sync with the same constants defined in usage_stats_client.h
+EXTRA_USAGE_TAG_PREFIX = "extra_usage_tag_"
+USAGE_STATS_NAMESPACE = "usage_stats"
+
+KUBERNETES_SERVICE_HOST_ENV = "KUBERNETES_SERVICE_HOST"
+KUBERAY_ENV = "RAY_USAGE_STATS_KUBERAY_IN_USE"
+
+PROVIDER_KUBERNETES_GENERIC = "kubernetes"
+PROVIDER_KUBERAY = "kuberay"
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/usage/usage_lib.py b/.venv/lib/python3.11/site-packages/ray/_private/usage/usage_lib.py
new file mode 100644
index 0000000000000000000000000000000000000000..558f56c602ef0b39e6d084f60ffbfbb026ff9175
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/usage/usage_lib.py
@@ -0,0 +1,964 @@
+"""This is the module that is in charge of Ray usage report (telemetry) APIs.
+
+NOTE: Ray's usage report is currently "on by default".
+      One could opt-out, see details at https://docs.ray.io/en/master/cluster/usage-stats.html. # noqa
+
+Ray usage report follows the specification from
+https://docs.google.com/document/d/1ZT-l9YbGHh-iWRUC91jS-ssQ5Qe2UQ43Lsoc1edCalc/edit#heading=h.17dss3b9evbj. # noqa
+
+# Module
+
+The module consists of 2 parts.
+
+## Public API
+It contains public APIs to obtain usage report information.
+APIs will be added before the usage report becomes opt-in by default.
+
+## Internal APIs for usage processing/report
+The telemetry report consists of 5 components. This module is in charge of the top 2 layers.
+
+Report                -> usage_lib
+---------------------
+Usage data processing -> usage_lib
+---------------------
+Data storage          -> Ray API server
+---------------------
+Aggregation           -> Ray API server (currently a dashboard server)
+---------------------
+Usage data collection -> Various components (Ray agent, GCS, etc.) + usage_lib (cluster metadata).
+
+Usage report is currently "off by default". You can enable the report by setting an environment variable
+RAY_USAGE_STATS_ENABLED=1. For example, `RAY_USAGE_STATS_ENABLED=1 ray start --head`.
+Or `RAY_USAGE_STATS_ENABLED=1 python [drivers with ray.init()]`.
+
+"Ray API server (currently a dashboard server)" reports the usage data to https://usage-stats.ray.io/.
+
+Data is reported every hour by default.
+
+Note that it is also possible to configure the interval using the environment variable,
+`RAY_USAGE_STATS_REPORT_INTERVAL_S`.
+
+To see collected/reported data, see `usage_stats.json` inside a temp
+folder (e.g., /tmp/ray/session_[id]/*).
+"""
+import json
+import logging
+import threading
+import os
+import platform
+import sys
+import time
+from dataclasses import asdict, dataclass
+from enum import Enum, auto
+from pathlib import Path
+from typing import Dict, List, Optional, Set
+
+import requests
+import yaml
+
+import ray
+from ray._raylet import GcsClient
+import ray._private.ray_constants as ray_constants
+import ray._private.usage.usage_constants as usage_constant
+from ray.experimental.internal_kv import (
+    _internal_kv_initialized,
+    _internal_kv_put,
+)
+from ray.core.generated import usage_pb2, gcs_pb2
+
+logger = logging.getLogger(__name__)
+TagKey = usage_pb2.TagKey
+
+#################
+# Internal APIs #
+#################
+
+
+@dataclass(init=True)
+class ClusterConfigToReport:
+    cloud_provider: Optional[str] = None
+    min_workers: Optional[int] = None
+    max_workers: Optional[int] = None
+    head_node_instance_type: Optional[str] = None
+    worker_node_instance_types: Optional[List[str]] = None
+
+
+@dataclass(init=True)
+class ClusterStatusToReport:
+    total_num_cpus: Optional[int] = None
+    total_num_gpus: Optional[int] = None
+    total_memory_gb: Optional[float] = None
+    total_object_store_memory_gb: Optional[float] = None
+
+
+@dataclass(init=True)
+class UsageStatsToReport:
+    """Usage stats to report"""
+
+    #: The schema version of the report.
+    schema_version: str
+    #: The source of the data (i.e. OSS).
+    source: str
+    #: When the data is collected and reported.
+    collect_timestamp_ms: int
+    #: The total number of successful reports for the lifetime of the cluster.
+    total_success: Optional[int] = None
+    #: The total number of failed reports for the lifetime of the cluster.
+    total_failed: Optional[int] = None
+    #: The sequence number of the report.
+    seq_number: Optional[int] = None
+    #: The Ray version in use.
+    ray_version: Optional[str] = None
+    #: The Python version in use.
+    python_version: Optional[str] = None
+    #: A random id of the cluster session.
+    session_id: Optional[str] = None
+    #: The git commit hash of Ray (i.e. ray.__commit__).
+    git_commit: Optional[str] = None
+    #: The operating system in use.
+    os: Optional[str] = None
+    #: When the cluster is started.
+    session_start_timestamp_ms: Optional[int] = None
+    #: The cloud provider found in the cluster.yaml file (e.g., aws).
+    cloud_provider: Optional[str] = None
+    #: The min_workers found in the cluster.yaml file.
+    min_workers: Optional[int] = None
+    #: The max_workers found in the cluster.yaml file.
+    max_workers: Optional[int] = None
+    #: The head node instance type found in the cluster.yaml file (e.g., i3.8xlarge).
+    head_node_instance_type: Optional[str] = None
+    #: The worker node instance types found in the cluster.yaml file (e.g., i3.8xlarge).
+    worker_node_instance_types: Optional[List[str]] = None
+    #: The total num of cpus in the cluster.
+    total_num_cpus: Optional[int] = None
+    #: The total num of gpus in the cluster.
+    total_num_gpus: Optional[int] = None
+    #: The total size of memory in the cluster.
+    total_memory_gb: Optional[float] = None
+    #: The total size of object store memory in the cluster.
+    total_object_store_memory_gb: Optional[float] = None
+    #: The Ray libraries that are used (e.g., rllib).
+    library_usages: Optional[List[str]] = None
+    #: The extra tags to report when specified by an
+    #  environment variable RAY_USAGE_STATS_EXTRA_TAGS
+    extra_usage_tags: Optional[Dict[str, str]] = None
+    #: The number of alive nodes when the report is generated.
+    total_num_nodes: Optional[int] = None
+    #: The total number of running jobs excluding internal ones
+    #  when the report is generated.
+    total_num_running_jobs: Optional[int] = None
+    #: The libc version in the OS.
+    libc_version: Optional[str] = None
+    #: The hardwares that are used (e.g. Intel Xeon).
+    hardware_usages: Optional[List[str]] = None
+
+
+@dataclass(init=True)
+class UsageStatsToWrite:
+    """Usage stats to write to `USAGE_STATS_FILE`
+
+    We are writing extra metadata such as the status of report
+    to this file.
+    """
+
+    usage_stats: UsageStatsToReport
+    # Whether or not the last report succeeded.
+    success: bool
+    # The error message of the last report if it happens.
+    error: str
+
+
+class UsageStatsEnabledness(Enum):
+    ENABLED_EXPLICITLY = auto()
+    DISABLED_EXPLICITLY = auto()
+    ENABLED_BY_DEFAULT = auto()
+
+
+_recorded_library_usages = set()
+_recorded_library_usages_lock = threading.Lock()
+_recorded_extra_usage_tags = dict()
+_recorded_extra_usage_tags_lock = threading.Lock()
+
+
+def _add_to_usage_set(set_name: str, value: str):
+    assert _internal_kv_initialized()
+    try:
+        _internal_kv_put(
+            f"{set_name}{value}".encode(),
+            b"",
+            namespace=usage_constant.USAGE_STATS_NAMESPACE.encode(),
+        )
+    except Exception as e:
+        logger.debug(f"Failed to add {value} to usage set {set_name}, {e}")
+
+
+def _get_usage_set(gcs_client, set_name: str) -> Set[str]:
+    try:
+        result = set()
+        usages = gcs_client.internal_kv_keys(
+            set_name.encode(),
+            namespace=usage_constant.USAGE_STATS_NAMESPACE.encode(),
+        )
+        for usage in usages:
+            usage = usage.decode("utf-8")
+            result.add(usage[len(set_name) :])
+
+        return result
+    except Exception as e:
+        logger.debug(f"Failed to get usage set {set_name}, {e}")
+        return set()
+
+
+def _put_library_usage(library_usage: str):
+    _add_to_usage_set(usage_constant.LIBRARY_USAGE_SET_NAME, library_usage)
+
+
+def _put_hardware_usage(hardware_usage: str):
+    _add_to_usage_set(usage_constant.HARDWARE_USAGE_SET_NAME, hardware_usage)
+
+
+def record_extra_usage_tag(
+    key: TagKey, value: str, gcs_client: Optional[GcsClient] = None
+):
+    """Record extra kv usage tag.
+
+    If the key already exists, the value will be overwritten.
+
+    To record an extra tag, first add the key to the TagKey enum and
+    then call this function.
+    It will make a synchronous call to the internal kv store if the tag is updated.
+
+    Args:
+        key: The key of the tag.
+        value: The value of the tag.
+        gcs_client: The GCS client to perform KV operation PUT. Defaults to None.
+            When None, it will try to get the global client from the internal_kv.
+    """
+    key = TagKey.Name(key).lower()
+    with _recorded_extra_usage_tags_lock:
+        if _recorded_extra_usage_tags.get(key) == value:
+            return
+        _recorded_extra_usage_tags[key] = value
+
+    if not _internal_kv_initialized() and gcs_client is None:
+        # This happens if the record is before ray.init and
+        # no GCS client is used for recording explicitly.
+        return
+
+    _put_extra_usage_tag(key, value, gcs_client)
+
+
+def _put_extra_usage_tag(key: str, value: str, gcs_client: Optional[GcsClient] = None):
+    try:
+        key = f"{usage_constant.EXTRA_USAGE_TAG_PREFIX}{key}".encode()
+        val = value.encode()
+        namespace = usage_constant.USAGE_STATS_NAMESPACE.encode()
+        if gcs_client is not None:
+            # Use the GCS client.
+            gcs_client.internal_kv_put(key, val, namespace=namespace)
+        else:
+            # Use internal kv.
+            assert _internal_kv_initialized()
+            _internal_kv_put(key, val, namespace=namespace)
+    except Exception as e:
+        logger.debug(f"Failed to put extra usage tag, {e}")
+
+
+def record_hardware_usage(hardware_usage: str):
+    """Record hardware usage (e.g. which CPU model is used)"""
+    assert _internal_kv_initialized()
+    _put_hardware_usage(hardware_usage)
+
+
+def record_library_usage(library_usage: str):
+    """Record library usage (e.g. which library is used)"""
+    with _recorded_library_usages_lock:
+        if library_usage in _recorded_library_usages:
+            return
+        _recorded_library_usages.add(library_usage)
+
+    if not _internal_kv_initialized():
+        # This happens if the library is imported before ray.init
+        return
+
+    # Only report lib usage for driver / ray client / workers. Otherwise,
+    # it can be reported if the library is imported from
+    # e.g., API server.
+    if (
+        ray._private.worker.global_worker.mode == ray.SCRIPT_MODE
+        or ray._private.worker.global_worker.mode == ray.WORKER_MODE
+        or ray.util.client.ray.is_connected()
+    ):
+        _put_library_usage(library_usage)
+
+
+def _put_pre_init_library_usages():
+    assert _internal_kv_initialized()
+    # NOTE: When the lib is imported from a worker, ray should
+    # always be initialized, so there's no need to register the
+    # pre init hook.
+    if not (
+        ray._private.worker.global_worker.mode == ray.SCRIPT_MODE
+        or ray.util.client.ray.is_connected()
+    ):
+        return
+
+    for library_usage in _recorded_library_usages:
+        _put_library_usage(library_usage)
+
+
+def _put_pre_init_extra_usage_tags():
+    assert _internal_kv_initialized()
+    for k, v in _recorded_extra_usage_tags.items():
+        _put_extra_usage_tag(k, v)
+
+
+def put_pre_init_usage_stats():
+    _put_pre_init_library_usages()
+    _put_pre_init_extra_usage_tags()
+
+
+def reset_global_state():
+    global _recorded_library_usages, _recorded_extra_usage_tags
+
+    with _recorded_library_usages_lock:
+        _recorded_library_usages = set()
+    with _recorded_extra_usage_tags_lock:
+        _recorded_extra_usage_tags = dict()
+
+
+ray._private.worker._post_init_hooks.append(put_pre_init_usage_stats)
+
+
+def _usage_stats_report_url():
+    # The usage collection server URL.
+    # The environment variable is testing-purpose only.
+    return os.getenv("RAY_USAGE_STATS_REPORT_URL", "https://usage-stats.ray.io/")
+
+
+def _usage_stats_report_interval_s():
+    return int(os.getenv("RAY_USAGE_STATS_REPORT_INTERVAL_S", 3600))
+
+
+def _usage_stats_config_path():
+    return os.getenv(
+        "RAY_USAGE_STATS_CONFIG_PATH", os.path.expanduser("~/.ray/config.json")
+    )
+
+
+def _usage_stats_enabledness() -> UsageStatsEnabledness:
+    # Env var has higher priority than config file.
+    usage_stats_enabled_env_var = os.getenv(usage_constant.USAGE_STATS_ENABLED_ENV_VAR)
+    if usage_stats_enabled_env_var == "0":
+        return UsageStatsEnabledness.DISABLED_EXPLICITLY
+    elif usage_stats_enabled_env_var == "1":
+        return UsageStatsEnabledness.ENABLED_EXPLICITLY
+    elif usage_stats_enabled_env_var is not None:
+        raise ValueError(
+            f"Valid value for {usage_constant.USAGE_STATS_ENABLED_ENV_VAR} "
+            f"env var is 0 or 1, but got {usage_stats_enabled_env_var}"
+        )
+
+    usage_stats_enabled_config_var = None
+    try:
+        with open(_usage_stats_config_path()) as f:
+            config = json.load(f)
+            usage_stats_enabled_config_var = config.get("usage_stats")
+    except FileNotFoundError:
+        pass
+    except Exception as e:
+        logger.debug(f"Failed to load usage stats config {e}")
+
+    if usage_stats_enabled_config_var is False:
+        return UsageStatsEnabledness.DISABLED_EXPLICITLY
+    elif usage_stats_enabled_config_var is True:
+        return UsageStatsEnabledness.ENABLED_EXPLICITLY
+    elif usage_stats_enabled_config_var is not None:
+        raise ValueError(
+            f"Valid value for 'usage_stats' in {_usage_stats_config_path()}"
+            f" is true or false, but got {usage_stats_enabled_config_var}"
+        )
+
+    # Usage stats is enabled by default.
+    return UsageStatsEnabledness.ENABLED_BY_DEFAULT
+
+
+def is_nightly_wheel() -> bool:
+    return ray.__commit__ != "{{RAY_COMMIT_SHA}}" and "dev" in ray.__version__
+
+
+def usage_stats_enabled() -> bool:
+    return _usage_stats_enabledness() is not UsageStatsEnabledness.DISABLED_EXPLICITLY
+
+
+def usage_stats_prompt_enabled():
+    return int(os.getenv("RAY_USAGE_STATS_PROMPT_ENABLED", "1")) == 1
+
+
+def _generate_cluster_metadata(*, ray_init_cluster: bool):
+    """Return a dictionary of cluster metadata.
+
+    Params:
+        ray_init_cluster: Whether the cluster is started by ray.init()
+    """
+    ray_version, python_version = ray._private.utils.compute_version_info()
+    # These two metadata is necessary although usage report is not enabled
+    # to check version compatibility.
+    metadata = {
+        "ray_version": ray_version,
+        "python_version": python_version,
+        "ray_init_cluster": ray_init_cluster,
+    }
+    # Additional metadata is recorded only when usage stats are enabled.
+    if usage_stats_enabled():
+        metadata.update(
+            {
+                "git_commit": ray.__commit__,
+                "os": sys.platform,
+                "session_start_timestamp_ms": int(time.time() * 1000),
+            }
+        )
+        if sys.platform == "linux":
+            # Record llibc version
+            (lib, ver) = platform.libc_ver()
+            if not lib:
+                metadata.update({"libc_version": "NA"})
+            else:
+                metadata.update({"libc_version": f"{lib}:{ver}"})
+    return metadata
+
+
+def show_usage_stats_prompt(cli: bool) -> None:
+    if not usage_stats_prompt_enabled():
+        return
+
+    from ray.autoscaler._private.cli_logger import cli_logger
+
+    prompt_print = cli_logger.print if cli else print
+
+    usage_stats_enabledness = _usage_stats_enabledness()
+    if usage_stats_enabledness is UsageStatsEnabledness.DISABLED_EXPLICITLY:
+        prompt_print(usage_constant.USAGE_STATS_DISABLED_MESSAGE)
+    elif usage_stats_enabledness is UsageStatsEnabledness.ENABLED_BY_DEFAULT:
+        if not cli:
+            prompt_print(
+                usage_constant.USAGE_STATS_ENABLED_BY_DEFAULT_FOR_RAY_INIT_MESSAGE
+            )
+        elif cli_logger.interactive:
+            enabled = cli_logger.confirm(
+                False,
+                usage_constant.USAGE_STATS_CONFIRMATION_MESSAGE,
+                _default=True,
+                _timeout_s=10,
+            )
+            set_usage_stats_enabled_via_env_var(enabled)
+            # Remember user's choice.
+            try:
+                set_usage_stats_enabled_via_config(enabled)
+            except Exception as e:
+                logger.debug(
+                    f"Failed to persist usage stats choice for future clusters: {e}"
+                )
+            if enabled:
+                prompt_print(usage_constant.USAGE_STATS_ENABLED_FOR_CLI_MESSAGE)
+            else:
+                prompt_print(usage_constant.USAGE_STATS_DISABLED_MESSAGE)
+        else:
+            prompt_print(
+                usage_constant.USAGE_STATS_ENABLED_BY_DEFAULT_FOR_CLI_MESSAGE,
+            )
+    else:
+        assert usage_stats_enabledness is UsageStatsEnabledness.ENABLED_EXPLICITLY
+        prompt_print(
+            usage_constant.USAGE_STATS_ENABLED_FOR_CLI_MESSAGE
+            if cli
+            else usage_constant.USAGE_STATS_ENABLED_FOR_RAY_INIT_MESSAGE
+        )
+
+
+def set_usage_stats_enabled_via_config(enabled) -> None:
+    config = {}
+    try:
+        with open(_usage_stats_config_path()) as f:
+            config = json.load(f)
+        if not isinstance(config, dict):
+            logger.debug(
+                f"Invalid ray config file, should be a json dict but got {type(config)}"
+            )
+            config = {}
+    except FileNotFoundError:
+        pass
+    except Exception as e:
+        logger.debug(f"Failed to load ray config file {e}")
+
+    config["usage_stats"] = enabled
+
+    try:
+        os.makedirs(os.path.dirname(_usage_stats_config_path()), exist_ok=True)
+        with open(_usage_stats_config_path(), "w") as f:
+            json.dump(config, f)
+    except Exception as e:
+        raise Exception(
+            "Failed to "
+            f'{"enable" if enabled else "disable"}'
+            ' usage stats by writing {"usage_stats": '
+            f'{"true" if enabled else "false"}'
+            "} to "
+            f"{_usage_stats_config_path()}"
+        ) from e
+
+
+def set_usage_stats_enabled_via_env_var(enabled) -> None:
+    os.environ[usage_constant.USAGE_STATS_ENABLED_ENV_VAR] = "1" if enabled else "0"
+
+
+def put_cluster_metadata(gcs_client, *, ray_init_cluster) -> None:
+    """Generate the cluster metadata and store it to GCS.
+
+    It is a blocking API.
+
+    Params:
+        gcs_client: The GCS client to perform KV operation PUT.
+        ray_init_cluster: Whether the cluster is started by ray.init()
+
+    Raises:
+        gRPC exceptions if PUT fails.
+    """
+    metadata = _generate_cluster_metadata(ray_init_cluster=ray_init_cluster)
+    gcs_client.internal_kv_put(
+        usage_constant.CLUSTER_METADATA_KEY,
+        json.dumps(metadata).encode(),
+        overwrite=True,
+        namespace=ray_constants.KV_NAMESPACE_CLUSTER,
+    )
+    return metadata
+
+
+def get_total_num_running_jobs_to_report(gcs_client) -> Optional[int]:
+    """Return the total number of running jobs in the cluster excluding internal ones"""
+    try:
+        result = gcs_client.get_all_job_info(
+            skip_submission_job_info_field=True, skip_is_running_tasks_field=True
+        )
+        total_num_running_jobs = 0
+        for job_info in result.values():
+            if not job_info.is_dead and not job_info.config.ray_namespace.startswith(
+                "_ray_internal"
+            ):
+                total_num_running_jobs += 1
+        return total_num_running_jobs
+    except Exception as e:
+        logger.info(f"Faile to query number of running jobs in the cluster: {e}")
+        return None
+
+
+def get_total_num_nodes_to_report(gcs_client, timeout=None) -> Optional[int]:
+    """Return the total number of alive nodes in the cluster"""
+    try:
+        result = gcs_client.get_all_node_info(timeout=timeout)
+        total_num_nodes = 0
+        for node_id, node_info in result.items():
+            if node_info.state == gcs_pb2.GcsNodeInfo.GcsNodeState.ALIVE:
+                total_num_nodes += 1
+        return total_num_nodes
+    except Exception as e:
+        logger.info(f"Faile to query number of nodes in the cluster: {e}")
+        return None
+
+
+def get_library_usages_to_report(gcs_client) -> List[str]:
+    return list(_get_usage_set(gcs_client, usage_constant.LIBRARY_USAGE_SET_NAME))
+
+
+def get_hardware_usages_to_report(gcs_client) -> List[str]:
+    return list(_get_usage_set(gcs_client, usage_constant.HARDWARE_USAGE_SET_NAME))
+
+
+def get_extra_usage_tags_to_report(gcs_client) -> Dict[str, str]:
+    """Get the extra usage tags from env var and gcs kv store.
+
+    The env var should be given this way; key=value;key=value.
+    If parsing is failed, it will return the empty data.
+
+    Returns:
+        Extra usage tags as kv pairs.
+    """
+    extra_usage_tags = dict()
+
+    extra_usage_tags_env_var = os.getenv("RAY_USAGE_STATS_EXTRA_TAGS", None)
+    if extra_usage_tags_env_var:
+        try:
+            kvs = extra_usage_tags_env_var.strip(";").split(";")
+            for kv in kvs:
+                k, v = kv.split("=")
+                extra_usage_tags[k] = v
+        except Exception as e:
+            logger.info(f"Failed to parse extra usage tags env var. Error: {e}")
+
+    valid_tag_keys = [tag_key.lower() for tag_key in TagKey.keys()]
+    try:
+        keys = gcs_client.internal_kv_keys(
+            usage_constant.EXTRA_USAGE_TAG_PREFIX.encode(),
+            namespace=usage_constant.USAGE_STATS_NAMESPACE.encode(),
+        )
+        for key in keys:
+            value = gcs_client.internal_kv_get(
+                key, namespace=usage_constant.USAGE_STATS_NAMESPACE.encode()
+            )
+            key = key.decode("utf-8")
+            key = key[len(usage_constant.EXTRA_USAGE_TAG_PREFIX) :]
+            assert key in valid_tag_keys
+            extra_usage_tags[key] = value.decode("utf-8")
+    except Exception as e:
+        logger.info(f"Failed to get extra usage tags from kv store {e}")
+    return extra_usage_tags
+
+
+def _get_cluster_status_to_report_v2(gcs_client) -> ClusterStatusToReport:
+    """
+    Get the current status of this cluster. A temporary proxy for the
+    autoscaler v2 API.
+
+    It is a blocking API.
+
+    Params:
+        gcs_client: The GCS client.
+
+    Returns:
+        The current cluster status or empty ClusterStatusToReport
+        if it fails to get that information.
+    """
+    from ray.autoscaler.v2.sdk import get_cluster_status
+
+    result = ClusterStatusToReport()
+    try:
+        cluster_status = get_cluster_status(gcs_client.address)
+        total_resources = cluster_status.total_resources()
+        result.total_num_cpus = int(total_resources.get("CPU", 0))
+        result.total_num_gpus = int(total_resources.get("GPU", 0))
+
+        to_GiB = 1 / 2**30
+        result.total_memory_gb = total_resources.get("memory", 0) * to_GiB
+        result.total_object_store_memory_gb = (
+            total_resources.get("object_store_memory", 0) * to_GiB
+        )
+    except Exception as e:
+        logger.info(f"Failed to get cluster status to report {e}")
+    finally:
+        return result
+
+
+def get_cluster_status_to_report(gcs_client) -> ClusterStatusToReport:
+    """Get the current status of this cluster.
+
+    It is a blocking API.
+
+    Params:
+        gcs_client: The GCS client to perform KV operation GET.
+
+    Returns:
+        The current cluster status or empty if it fails to get that information.
+    """
+    try:
+
+        from ray.autoscaler.v2.utils import is_autoscaler_v2
+
+        if is_autoscaler_v2():
+            return _get_cluster_status_to_report_v2(gcs_client)
+
+        cluster_status = gcs_client.internal_kv_get(
+            ray._private.ray_constants.DEBUG_AUTOSCALING_STATUS.encode(),
+            namespace=None,
+        )
+        if not cluster_status:
+            return ClusterStatusToReport()
+
+        result = ClusterStatusToReport()
+        to_GiB = 1 / 2**30
+        cluster_status = json.loads(cluster_status.decode("utf-8"))
+        if (
+            "load_metrics_report" not in cluster_status
+            or "usage" not in cluster_status["load_metrics_report"]
+        ):
+            return ClusterStatusToReport()
+
+        usage = cluster_status["load_metrics_report"]["usage"]
+        # usage is a map from resource to (used, total) pair
+        if "CPU" in usage:
+            result.total_num_cpus = int(usage["CPU"][1])
+        if "GPU" in usage:
+            result.total_num_gpus = int(usage["GPU"][1])
+        if "memory" in usage:
+            result.total_memory_gb = usage["memory"][1] * to_GiB
+        if "object_store_memory" in usage:
+            result.total_object_store_memory_gb = (
+                usage["object_store_memory"][1] * to_GiB
+            )
+        return result
+    except Exception as e:
+        logger.info(f"Failed to get cluster status to report {e}")
+        return ClusterStatusToReport()
+
+
+def get_cluster_config_to_report(
+    cluster_config_file_path: str,
+) -> ClusterConfigToReport:
+    """Get the static cluster (autoscaler) config used to launch this cluster.
+
+    Params:
+        cluster_config_file_path: The file path to the cluster config file.
+
+    Returns:
+        The cluster (autoscaler) config or empty if it fails to get that information.
+    """
+
+    def get_instance_type(node_config):
+        if not node_config:
+            return None
+        if "InstanceType" in node_config:
+            # aws
+            return node_config["InstanceType"]
+        if "machineType" in node_config:
+            # gcp
+            return node_config["machineType"]
+        if (
+            "azure_arm_parameters" in node_config
+            and "vmSize" in node_config["azure_arm_parameters"]
+        ):
+            return node_config["azure_arm_parameters"]["vmSize"]
+        return None
+
+    try:
+        with open(cluster_config_file_path) as f:
+            config = yaml.safe_load(f)
+            result = ClusterConfigToReport()
+            if "min_workers" in config:
+                result.min_workers = config["min_workers"]
+            if "max_workers" in config:
+                result.max_workers = config["max_workers"]
+
+            if "provider" in config and "type" in config["provider"]:
+                result.cloud_provider = config["provider"]["type"]
+
+            if "head_node_type" not in config:
+                return result
+            if "available_node_types" not in config:
+                return result
+            head_node_type = config["head_node_type"]
+            available_node_types = config["available_node_types"]
+            for available_node_type in available_node_types:
+                if available_node_type == head_node_type:
+                    head_node_instance_type = get_instance_type(
+                        available_node_types[available_node_type].get("node_config")
+                    )
+                    if head_node_instance_type:
+                        result.head_node_instance_type = head_node_instance_type
+                else:
+                    worker_node_instance_type = get_instance_type(
+                        available_node_types[available_node_type].get("node_config")
+                    )
+                    if worker_node_instance_type:
+                        result.worker_node_instance_types = (
+                            result.worker_node_instance_types or set()
+                        )
+                        result.worker_node_instance_types.add(worker_node_instance_type)
+            if result.worker_node_instance_types:
+                result.worker_node_instance_types = list(
+                    result.worker_node_instance_types
+                )
+            return result
+    except FileNotFoundError:
+        # It's a manually started cluster or k8s cluster
+        result = ClusterConfigToReport()
+        # Check if we're on Kubernetes
+        if usage_constant.KUBERNETES_SERVICE_HOST_ENV in os.environ:
+            # Check if we're using KubeRay >= 0.4.0.
+            if usage_constant.KUBERAY_ENV in os.environ:
+                result.cloud_provider = usage_constant.PROVIDER_KUBERAY
+            # Else, we're on Kubernetes but not in either of the above categories.
+            else:
+                result.cloud_provider = usage_constant.PROVIDER_KUBERNETES_GENERIC
+        return result
+    except Exception as e:
+        logger.info(f"Failed to get cluster config to report {e}")
+        return ClusterConfigToReport()
+
+
+def get_cluster_metadata(gcs_client) -> dict:
+    """Get the cluster metadata from GCS.
+
+    It is a blocking API.
+
+    This will return None if `put_cluster_metadata` was never called.
+
+    Params:
+        gcs_client: The GCS client to perform KV operation GET.
+
+    Returns:
+        The cluster metadata in a dictinoary.
+
+    Raises:
+        RuntimeError if it fails to obtain cluster metadata from GCS.
+    """
+    return json.loads(
+        gcs_client.internal_kv_get(
+            usage_constant.CLUSTER_METADATA_KEY,
+            namespace=ray_constants.KV_NAMESPACE_CLUSTER,
+        ).decode("utf-8")
+    )
+
+
+def is_ray_init_cluster(gcs_client: ray._raylet.GcsClient) -> bool:
+    """Return whether the cluster is started by ray.init()"""
+    cluster_metadata = get_cluster_metadata(gcs_client)
+    return cluster_metadata["ray_init_cluster"]
+
+
+def generate_disabled_report_data() -> UsageStatsToReport:
+    """Generate the report data indicating usage stats is disabled"""
+    data = UsageStatsToReport(
+        schema_version=usage_constant.SCHEMA_VERSION,
+        source=os.getenv(
+            usage_constant.USAGE_STATS_SOURCE_ENV_VAR,
+            usage_constant.USAGE_STATS_SOURCE_OSS,
+        ),
+        collect_timestamp_ms=int(time.time() * 1000),
+    )
+    return data
+
+
+def generate_report_data(
+    cluster_config_to_report: ClusterConfigToReport,
+    total_success: int,
+    total_failed: int,
+    seq_number: int,
+    gcs_address: str,
+    cluster_id: str,
+) -> UsageStatsToReport:
+    """Generate the report data.
+
+    Params:
+        cluster_config_to_report: The cluster (autoscaler)
+            config generated by `get_cluster_config_to_report`.
+        total_success: The total number of successful report
+            for the lifetime of the cluster.
+        total_failed: The total number of failed report
+            for the lifetime of the cluster.
+        seq_number: The sequence number that's incremented whenever
+            a new report is sent.
+        gcs_address: the address of gcs to get data to report.
+        cluster_id: hex id of the cluster.
+
+    Returns:
+        UsageStats
+    """
+    assert cluster_id
+
+    gcs_client = ray._raylet.GcsClient(
+        address=gcs_address, nums_reconnect_retry=20, cluster_id=cluster_id
+    )
+
+    cluster_metadata = get_cluster_metadata(gcs_client)
+    cluster_status_to_report = get_cluster_status_to_report(gcs_client)
+
+    data = UsageStatsToReport(
+        schema_version=usage_constant.SCHEMA_VERSION,
+        source=os.getenv(
+            usage_constant.USAGE_STATS_SOURCE_ENV_VAR,
+            usage_constant.USAGE_STATS_SOURCE_OSS,
+        ),
+        collect_timestamp_ms=int(time.time() * 1000),
+        total_success=total_success,
+        total_failed=total_failed,
+        seq_number=seq_number,
+        ray_version=cluster_metadata["ray_version"],
+        python_version=cluster_metadata["python_version"],
+        session_id=cluster_id,
+        git_commit=cluster_metadata["git_commit"],
+        os=cluster_metadata["os"],
+        session_start_timestamp_ms=cluster_metadata["session_start_timestamp_ms"],
+        cloud_provider=cluster_config_to_report.cloud_provider,
+        min_workers=cluster_config_to_report.min_workers,
+        max_workers=cluster_config_to_report.max_workers,
+        head_node_instance_type=cluster_config_to_report.head_node_instance_type,
+        worker_node_instance_types=cluster_config_to_report.worker_node_instance_types,
+        total_num_cpus=cluster_status_to_report.total_num_cpus,
+        total_num_gpus=cluster_status_to_report.total_num_gpus,
+        total_memory_gb=cluster_status_to_report.total_memory_gb,
+        total_object_store_memory_gb=cluster_status_to_report.total_object_store_memory_gb,  # noqa: E501
+        library_usages=get_library_usages_to_report(gcs_client),
+        extra_usage_tags=get_extra_usage_tags_to_report(gcs_client),
+        total_num_nodes=get_total_num_nodes_to_report(gcs_client),
+        total_num_running_jobs=get_total_num_running_jobs_to_report(gcs_client),
+        libc_version=cluster_metadata.get("libc_version"),
+        hardware_usages=get_hardware_usages_to_report(gcs_client),
+    )
+    return data
+
+
+def generate_write_data(
+    usage_stats: UsageStatsToReport,
+    error: str,
+) -> UsageStatsToWrite:
+    """Generate the report data.
+
+    Params:
+        usage_stats: The usage stats that were reported.
+        error: The error message of failed reports.
+
+    Returns:
+        UsageStatsToWrite
+    """
+    data = UsageStatsToWrite(
+        usage_stats=usage_stats,
+        success=error is None,
+        error=error,
+    )
+    return data
+
+
+class UsageReportClient:
+    """The client implementation for usage report.
+
+    It is in charge of writing usage stats to the directory
+    and report usage stats.
+    """
+
+    def write_usage_data(self, data: UsageStatsToWrite, dir_path: str) -> None:
+        """Write the usage data to the directory.
+
+        Params:
+            data: Data to report
+            dir_path: The path to the directory to write usage data.
+        """
+        # Atomically update the file.
+        dir_path = Path(dir_path)
+        destination = dir_path / usage_constant.USAGE_STATS_FILE
+        temp = dir_path / f"{usage_constant.USAGE_STATS_FILE}.tmp"
+        with temp.open(mode="w") as json_file:
+            json_file.write(json.dumps(asdict(data)))
+        if sys.platform == "win32":
+            # Windows 32 doesn't support atomic renaming, so we should delete
+            # the file first.
+            destination.unlink(missing_ok=True)
+        temp.rename(destination)
+
+    def report_usage_data(self, url: str, data: UsageStatsToReport) -> None:
+        """Report the usage data to the usage server.
+
+        Params:
+            url: The URL to update resource usage.
+            data: Data to report.
+
+        Raises:
+            requests.HTTPError if requests fails.
+        """
+        r = requests.request(
+            "POST",
+            url,
+            headers={"Content-Type": "application/json"},
+            json=asdict(data),
+            timeout=10,
+        )
+        r.raise_for_status()
+        return r
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/workers/__init__.py b/.venv/lib/python3.11/site-packages/ray/_private/workers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c8bb2fdec1eea3ab8e03a2b416d3eb512a41c63
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/default_worker.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/default_worker.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bba94e86d3ddba7ed786bca987d407ecd3205b68
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/default_worker.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/setup_worker.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/setup_worker.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ac7a0da7e2d0f6b0347325c02436d6ae114e6f1
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/_private/workers/__pycache__/setup_worker.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/workers/default_worker.py b/.venv/lib/python3.11/site-packages/ray/_private/workers/default_worker.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a6ffe2bcce9e18ff01c77eb4a6dc9978016a09e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/workers/default_worker.py
@@ -0,0 +1,304 @@
+import os
+import argparse
+import base64
+import json
+import time
+
+import ray
+import ray._private.node
+import ray._private.ray_constants as ray_constants
+import ray._private.utils
+import ray.actor
+from ray._private.async_compat import try_install_uvloop
+from ray._private.parameter import RayParams
+from ray._private.ray_logging import configure_log_file, get_worker_log_file_name
+from ray._private.runtime_env.setup_hook import load_and_execute_setup_hook
+
+parser = argparse.ArgumentParser(
+    description=("Parse addresses for the worker to connect to.")
+)
+parser.add_argument(
+    "--cluster-id",
+    required=True,
+    type=str,
+    help="the auto-generated ID of the cluster",
+)
+parser.add_argument(
+    "--node-id",
+    required=True,
+    type=str,
+    help="the auto-generated ID of the node",
+)
+parser.add_argument(
+    "--node-ip-address",
+    required=True,
+    type=str,
+    help="the ip address of the worker's node",
+)
+parser.add_argument(
+    "--node-manager-port", required=True, type=int, help="the port of the worker's node"
+)
+parser.add_argument(
+    "--raylet-ip-address",
+    required=False,
+    type=str,
+    default=None,
+    help="the ip address of the worker's raylet",
+)
+parser.add_argument(
+    "--redis-address", required=True, type=str, help="the address to use for Redis"
+)
+parser.add_argument(
+    "--gcs-address", required=True, type=str, help="the address to use for GCS"
+)
+parser.add_argument(
+    "--redis-username",
+    required=False,
+    type=str,
+    default=None,
+    help="the username to use for Redis",
+)
+parser.add_argument(
+    "--redis-password",
+    required=False,
+    type=str,
+    default=None,
+    help="the password to use for Redis",
+)
+parser.add_argument(
+    "--object-store-name", required=True, type=str, help="the object store's name"
+)
+parser.add_argument("--raylet-name", required=False, type=str, help="the raylet's name")
+parser.add_argument(
+    "--logging-level",
+    required=False,
+    type=str,
+    default=ray_constants.LOGGER_LEVEL,
+    choices=ray_constants.LOGGER_LEVEL_CHOICES,
+    help=ray_constants.LOGGER_LEVEL_HELP,
+)
+parser.add_argument(
+    "--logging-format",
+    required=False,
+    type=str,
+    default=ray_constants.LOGGER_FORMAT,
+    help=ray_constants.LOGGER_FORMAT_HELP,
+)
+parser.add_argument(
+    "--temp-dir",
+    required=False,
+    type=str,
+    default=None,
+    help="Specify the path of the temporary directory use by Ray process.",
+)
+parser.add_argument(
+    "--storage",
+    required=False,
+    type=str,
+    default=None,
+    help="Specify the persistent storage path.",
+)
+parser.add_argument(
+    "--load-code-from-local",
+    default=False,
+    action="store_true",
+    help="True if code is loaded from local files, as opposed to the GCS.",
+)
+parser.add_argument(
+    "--worker-type",
+    required=False,
+    type=str,
+    default="WORKER",
+    help="Specify the type of the worker process",
+)
+parser.add_argument(
+    "--metrics-agent-port",
+    required=True,
+    type=int,
+    help="the port of the node's metric agent.",
+)
+parser.add_argument(
+    "--runtime-env-agent-port",
+    required=True,
+    type=int,
+    default=None,
+    help="The port on which the runtime env agent receives HTTP requests.",
+)
+parser.add_argument(
+    "--object-spilling-config",
+    required=False,
+    type=str,
+    default="",
+    help="The configuration of object spilling. Only used by I/O workers.",
+)
+parser.add_argument(
+    "--logging-rotate-bytes",
+    required=False,
+    type=int,
+    default=ray_constants.LOGGING_ROTATE_BYTES,
+    help="Specify the max bytes for rotating "
+    "log file, default is "
+    f"{ray_constants.LOGGING_ROTATE_BYTES} bytes.",
+)
+parser.add_argument(
+    "--logging-rotate-backup-count",
+    required=False,
+    type=int,
+    default=ray_constants.LOGGING_ROTATE_BACKUP_COUNT,
+    help="Specify the backup count of rotated log file, default is "
+    f"{ray_constants.LOGGING_ROTATE_BACKUP_COUNT}.",
+)
+parser.add_argument(
+    "--runtime-env-hash",
+    required=False,
+    type=int,
+    default=0,
+    help="The computed hash of the runtime env for this worker.",
+)
+parser.add_argument(
+    "--startup-token",
+    required=True,
+    type=int,
+    help="The startup token assigned to this worker process by the raylet.",
+)
+parser.add_argument(
+    "--ray-debugger-external",
+    default=False,
+    action="store_true",
+    help="True if Ray debugger is made available externally.",
+)
+parser.add_argument("--session-name", required=False, help="The current session name")
+parser.add_argument(
+    "--webui",
+    required=False,
+    help="The address of web ui",
+)
+parser.add_argument(
+    "--worker-launch-time-ms",
+    required=True,
+    type=int,
+    help="The time when raylet starts to launch the worker process.",
+)
+
+parser.add_argument(
+    "--worker-preload-modules",
+    type=str,
+    required=False,
+    help=(
+        "A comma-separated list of Python module names "
+        "to import before accepting work."
+    ),
+)
+
+if __name__ == "__main__":
+    # NOTE(sang): For some reason, if we move the code below
+    # to a separate function, tensorflow will capture that method
+    # as a step function. For more details, check out
+    # https://github.com/ray-project/ray/pull/12225#issue-525059663.
+    args = parser.parse_args()
+    ray._private.ray_logging.setup_logger(args.logging_level, args.logging_format)
+    worker_launched_time_ms = time.time_ns() // 1e6
+    if args.worker_type == "WORKER":
+        mode = ray.WORKER_MODE
+    elif args.worker_type == "SPILL_WORKER":
+        mode = ray.SPILL_WORKER_MODE
+    elif args.worker_type == "RESTORE_WORKER":
+        mode = ray.RESTORE_WORKER_MODE
+    else:
+        raise ValueError("Unknown worker type: " + args.worker_type)
+
+    # Try installing uvloop as default event-loop implementation
+    # for asyncio
+    try_install_uvloop()
+
+    raylet_ip_address = args.raylet_ip_address
+    if raylet_ip_address is None:
+        raylet_ip_address = args.node_ip_address
+    ray_params = RayParams(
+        node_ip_address=args.node_ip_address,
+        raylet_ip_address=raylet_ip_address,
+        node_manager_port=args.node_manager_port,
+        redis_address=args.redis_address,
+        redis_username=args.redis_username,
+        redis_password=args.redis_password,
+        plasma_store_socket_name=args.object_store_name,
+        raylet_socket_name=args.raylet_name,
+        temp_dir=args.temp_dir,
+        storage=args.storage,
+        metrics_agent_port=args.metrics_agent_port,
+        runtime_env_agent_port=args.runtime_env_agent_port,
+        gcs_address=args.gcs_address,
+        session_name=args.session_name,
+        webui=args.webui,
+        cluster_id=args.cluster_id,
+        node_id=args.node_id,
+    )
+    node = ray._private.node.Node(
+        ray_params,
+        head=False,
+        shutdown_at_exit=False,
+        spawn_reaper=False,
+        connect_only=True,
+        default_worker=True,
+    )
+
+    # NOTE(suquark): We must initialize the external storage before we
+    # connect to raylet. Otherwise we may receive requests before the
+    # external storage is intialized.
+    if mode == ray.RESTORE_WORKER_MODE or mode == ray.SPILL_WORKER_MODE:
+        from ray._private import external_storage, storage
+
+        storage._init_storage(args.storage, is_head=False)
+        if args.object_spilling_config:
+            object_spilling_config = base64.b64decode(args.object_spilling_config)
+            object_spilling_config = json.loads(object_spilling_config)
+        else:
+            object_spilling_config = {}
+        external_storage.setup_external_storage(
+            object_spilling_config, node.node_id, node.session_name
+        )
+
+    ray._private.worker._global_node = node
+    ray._private.worker.connect(
+        node,
+        node.session_name,
+        mode=mode,
+        runtime_env_hash=args.runtime_env_hash,
+        startup_token=args.startup_token,
+        ray_debugger_external=args.ray_debugger_external,
+        worker_launch_time_ms=args.worker_launch_time_ms,
+        worker_launched_time_ms=worker_launched_time_ms,
+    )
+
+    worker = ray._private.worker.global_worker
+
+    # Setup log file.
+    out_file, err_file = node.get_log_file_handles(
+        get_worker_log_file_name(args.worker_type)
+    )
+    configure_log_file(out_file, err_file)
+    worker.set_out_file(out_file)
+    worker.set_err_file(err_file)
+
+    if mode == ray.WORKER_MODE and args.worker_preload_modules:
+        module_names_to_import = args.worker_preload_modules.split(",")
+        ray._private.utils.try_import_each_module(module_names_to_import)
+
+    # If the worker setup function is configured, run it.
+    worker_process_setup_hook_key = os.getenv(
+        ray_constants.WORKER_PROCESS_SETUP_HOOK_ENV_VAR
+    )
+    if worker_process_setup_hook_key:
+        error = load_and_execute_setup_hook(worker_process_setup_hook_key)
+        if error is not None:
+            worker.core_worker.drain_and_exit_worker("system", error)
+
+    if mode == ray.WORKER_MODE:
+        worker.main_loop()
+    elif mode in [ray.RESTORE_WORKER_MODE, ray.SPILL_WORKER_MODE]:
+        # It is handled by another thread in the C++ core worker.
+        # We just need to keep the worker alive.
+        while True:
+            time.sleep(100000)
+    else:
+        raise ValueError(f"Unexcepted worker mode: {mode}")
diff --git a/.venv/lib/python3.11/site-packages/ray/_private/workers/setup_worker.py b/.venv/lib/python3.11/site-packages/ray/_private/workers/setup_worker.py
new file mode 100644
index 0000000000000000000000000000000000000000..23ba980a5bb22b2ce648c6df45f8ba0b7e73f3dd
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/_private/workers/setup_worker.py
@@ -0,0 +1,33 @@
+import argparse
+import logging
+
+from ray._private.ray_constants import LOGGER_FORMAT, LOGGER_LEVEL
+from ray._private.ray_logging import setup_logger
+from ray._private.runtime_env.context import RuntimeEnvContext
+from ray.core.generated.common_pb2 import Language
+
+logger = logging.getLogger(__name__)
+
+parser = argparse.ArgumentParser(
+    description=("Set up the environment for a Ray worker and launch the worker.")
+)
+
+parser.add_argument(
+    "--serialized-runtime-env-context",
+    type=str,
+    help="the serialized runtime env context",
+)
+
+parser.add_argument("--language", type=str, help="the language type of the worker")
+
+
+if __name__ == "__main__":
+    setup_logger(LOGGER_LEVEL, LOGGER_FORMAT)
+    args, remaining_args = parser.parse_known_args()
+    # NOTE(edoakes): args.serialized_runtime_env_context is only None when
+    # we're starting the main Ray client proxy server. That case should
+    # probably not even go through this codepath.
+    runtime_env_context = RuntimeEnvContext.deserialize(
+        args.serialized_runtime_env_context or "{}"
+    )
+    runtime_env_context.exec_worker(remaining_args, Language.Value(args.language))
diff --git a/.venv/lib/python3.11/site-packages/ray/jars/ray_dist.jar b/.venv/lib/python3.11/site-packages/ray/jars/ray_dist.jar
new file mode 100644
index 0000000000000000000000000000000000000000..564e8b27ef6ad6f49d2f032060c59b34b44837e5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/jars/ray_dist.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f3835fe29f363a67c05160a5c60634942abbd46720e587faad488cadebd2e8a
+size 32364530
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f63b8173d43382c546dd1aaa1d09c316bd3ba846
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/__init__.py
@@ -0,0 +1,55 @@
+import logging
+
+from ray._private.usage import usage_lib
+
+# Note: do not introduce unnecessary library dependencies here, e.g. gym.
+# This file is imported from the tune module in order to register RLlib agents.
+from ray.rllib.env.base_env import BaseEnv
+from ray.rllib.env.external_env import ExternalEnv
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from ray.rllib.env.vector_env import VectorEnv
+from ray.rllib.evaluation.rollout_worker import RolloutWorker
+from ray.rllib.policy.policy import Policy
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.policy.tf_policy import TFPolicy
+from ray.rllib.policy.torch_policy import TorchPolicy
+from ray.tune.registry import register_trainable
+
+
+def _setup_logger():
+    logger = logging.getLogger("ray.rllib")
+    handler = logging.StreamHandler()
+    handler.setFormatter(
+        logging.Formatter(
+            "%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s"
+        )
+    )
+    logger.addHandler(handler)
+    logger.propagate = False
+
+
+def _register_all():
+    from ray.rllib.algorithms.registry import ALGORITHMS, _get_algorithm_class
+
+    for key, get_trainable_class_and_config in ALGORITHMS.items():
+        register_trainable(key, get_trainable_class_and_config()[0])
+
+    for key in ["__fake", "__sigmoid_fake_data", "__parameter_tuning"]:
+        register_trainable(key, _get_algorithm_class(key))
+
+
+_setup_logger()
+
+usage_lib.record_library_usage("rllib")
+
+__all__ = [
+    "Policy",
+    "TFPolicy",
+    "TorchPolicy",
+    "RolloutWorker",
+    "SampleBatch",
+    "BaseEnv",
+    "MultiAgentEnv",
+    "VectorEnv",
+    "ExternalEnv",
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6a2b3345f868a12fb219485ce11e62a511abecc
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__init__.py
@@ -0,0 +1,23 @@
+from ray.rllib.execution.learner_thread import LearnerThread
+from ray.rllib.execution.multi_gpu_learner_thread import MultiGPULearnerThread
+from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
+from ray.rllib.execution.replay_ops import SimpleReplayBuffer
+from ray.rllib.execution.rollout_ops import (
+    standardize_fields,
+    synchronous_parallel_sample,
+)
+from ray.rllib.execution.train_ops import (
+    train_one_step,
+    multi_gpu_train_one_step,
+)
+
+__all__ = [
+    "multi_gpu_train_one_step",
+    "standardize_fields",
+    "synchronous_parallel_sample",
+    "train_one_step",
+    "LearnerThread",
+    "MultiGPULearnerThread",
+    "SimpleReplayBuffer",
+    "MinibatchBuffer",
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da6f73fc34cae24dae18449a56bced27d29d38be
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/learner_thread.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/learner_thread.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a376377f844c1fc961a994ad71eb3c5b8231f79
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/learner_thread.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/minibatch_buffer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/minibatch_buffer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..259582cdc5722e0f243499e9c3b047f5c44f8246
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/minibatch_buffer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/multi_gpu_learner_thread.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/multi_gpu_learner_thread.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7245376c1b1faca7fdda60ccfee10f1e8b06096c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/multi_gpu_learner_thread.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/replay_ops.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/replay_ops.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4210303bd111b8ccd0426703479e14c6c7e11b6c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/replay_ops.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/rollout_ops.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/rollout_ops.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2ccd2ffec7d2740546dc2259cc900de99e4ceb2
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/rollout_ops.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/segment_tree.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/segment_tree.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..34924828807bacdcaf3c13612b81f6950894b791
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/segment_tree.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/train_ops.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/train_ops.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55169ab6428e0b0259ca12d38ff2c02fa0c95ec3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/__pycache__/train_ops.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/buffers/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/buffers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/buffers/__pycache__/mixin_replay_buffer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/execution/buffers/__pycache__/mixin_replay_buffer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..23ee3f4322025372d8f2f53238d54379d513fa99
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/execution/buffers/__pycache__/mixin_replay_buffer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/learner_thread.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/learner_thread.py
new file mode 100644
index 0000000000000000000000000000000000000000..49340a972c350acc6584d8274c87aab937198886
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/learner_thread.py
@@ -0,0 +1,137 @@
+import copy
+import queue
+import threading
+from typing import Dict, Optional
+
+from ray.util.timer import _Timer
+from ray.rllib.evaluation.rollout_worker import RolloutWorker
+from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.utils.framework import try_import_tf
+from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder, LEARNER_INFO
+from ray.rllib.utils.metrics.window_stat import WindowStat
+from ray.util.iter import _NextValueNotReady
+
+tf1, tf, tfv = try_import_tf()
+
+
+@OldAPIStack
+class LearnerThread(threading.Thread):
+    """Background thread that updates the local model from sample trajectories.
+
+    The learner thread communicates with the main thread through Queues. This
+    is needed since Ray operations can only be run on the main thread. In
+    addition, moving heavyweight gradient ops session runs off the main thread
+    improves overall throughput.
+    """
+
+    def __init__(
+        self,
+        local_worker: RolloutWorker,
+        minibatch_buffer_size: int,
+        num_sgd_iter: int,
+        learner_queue_size: int,
+        learner_queue_timeout: int,
+    ):
+        """Initialize the learner thread.
+
+        Args:
+            local_worker: process local rollout worker holding
+                policies this thread will call learn_on_batch() on
+            minibatch_buffer_size: max number of train batches to store
+                in the minibatching buffer
+            num_sgd_iter: number of passes to learn on per train batch
+            learner_queue_size: max size of queue of inbound
+                train batches to this thread
+            learner_queue_timeout: raise an exception if the queue has
+                been empty for this long in seconds
+        """
+        threading.Thread.__init__(self)
+        self.learner_queue_size = WindowStat("size", 50)
+        self.local_worker = local_worker
+        self.inqueue = queue.Queue(maxsize=learner_queue_size)
+        self.outqueue = queue.Queue()
+        self.minibatch_buffer = MinibatchBuffer(
+            inqueue=self.inqueue,
+            size=minibatch_buffer_size,
+            timeout=learner_queue_timeout,
+            num_passes=num_sgd_iter,
+            init_num_passes=num_sgd_iter,
+        )
+        self.queue_timer = _Timer()
+        self.grad_timer = _Timer()
+        self.load_timer = _Timer()
+        self.load_wait_timer = _Timer()
+        self.daemon = True
+        self.policy_ids_updated = []
+        self.learner_info = {}
+        self.stopped = False
+        self.num_steps = 0
+
+    def run(self) -> None:
+        # Switch on eager mode if configured.
+        if self.local_worker.config.framework_str == "tf2":
+            tf1.enable_eager_execution()
+        while not self.stopped:
+            self.step()
+
+    def step(self) -> Optional[_NextValueNotReady]:
+        with self.queue_timer:
+            try:
+                batch, _ = self.minibatch_buffer.get()
+            except queue.Empty:
+                return _NextValueNotReady()
+        with self.grad_timer:
+            # Use LearnerInfoBuilder as a unified way to build the final
+            # results dict from `learn_on_loaded_batch` call(s).
+            # This makes sure results dicts always have the same structure
+            # no matter the setup (multi-GPU, multi-agent, minibatch SGD,
+            # tf vs torch).
+            learner_info_builder = LearnerInfoBuilder(num_devices=1)
+            if self.local_worker.config.policy_states_are_swappable:
+                self.local_worker.lock()
+            multi_agent_results = self.local_worker.learn_on_batch(batch)
+            if self.local_worker.config.policy_states_are_swappable:
+                self.local_worker.unlock()
+            self.policy_ids_updated.extend(list(multi_agent_results.keys()))
+            for pid, results in multi_agent_results.items():
+                learner_info_builder.add_learn_on_batch_results(results, pid)
+            self.learner_info = learner_info_builder.finalize()
+
+        self.num_steps += 1
+        # Put tuple: env-steps, agent-steps, and learner info into the queue.
+        self.outqueue.put((batch.count, batch.agent_steps(), self.learner_info))
+        self.learner_queue_size.push(self.inqueue.qsize())
+
+    def add_learner_metrics(self, result: Dict, overwrite_learner_info=True) -> Dict:
+        """Add internal metrics to a result dict."""
+
+        def timer_to_ms(timer):
+            return round(1000 * timer.mean, 3)
+
+        if overwrite_learner_info:
+            result["info"].update(
+                {
+                    "learner_queue": self.learner_queue_size.stats(),
+                    LEARNER_INFO: copy.deepcopy(self.learner_info),
+                    "timing_breakdown": {
+                        "learner_grad_time_ms": timer_to_ms(self.grad_timer),
+                        "learner_load_time_ms": timer_to_ms(self.load_timer),
+                        "learner_load_wait_time_ms": timer_to_ms(self.load_wait_timer),
+                        "learner_dequeue_time_ms": timer_to_ms(self.queue_timer),
+                    },
+                }
+            )
+        else:
+            result["info"].update(
+                {
+                    "learner_queue": self.learner_queue_size.stats(),
+                    "timing_breakdown": {
+                        "learner_grad_time_ms": timer_to_ms(self.grad_timer),
+                        "learner_load_time_ms": timer_to_ms(self.load_timer),
+                        "learner_load_wait_time_ms": timer_to_ms(self.load_wait_timer),
+                        "learner_dequeue_time_ms": timer_to_ms(self.queue_timer),
+                    },
+                }
+            )
+        return result
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/minibatch_buffer.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/minibatch_buffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..29b224ce2e30f81c2b825d63d3056d0f8bc4c595
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/minibatch_buffer.py
@@ -0,0 +1,61 @@
+from typing import Any, Tuple
+import queue
+
+from ray.rllib.utils.annotations import OldAPIStack
+
+
+@OldAPIStack
+class MinibatchBuffer:
+    """Ring buffer of recent data batches for minibatch SGD.
+
+    This is for use with AsyncSamplesOptimizer.
+    """
+
+    def __init__(
+        self,
+        inqueue: queue.Queue,
+        size: int,
+        timeout: float,
+        num_passes: int,
+        init_num_passes: int = 1,
+    ):
+        """Initialize a minibatch buffer.
+
+        Args:
+           inqueue (queue.Queue): Queue to populate the internal ring buffer
+           from.
+           size: Max number of data items to buffer.
+           timeout: Queue timeout
+           num_passes: Max num times each data item should be emitted.
+           init_num_passes: Initial passes for each data item.
+           Maxiumum number of passes per item are increased to num_passes over
+           time.
+        """
+        self.inqueue = inqueue
+        self.size = size
+        self.timeout = timeout
+        self.max_initial_ttl = num_passes
+        self.cur_initial_ttl = init_num_passes
+        self.buffers = [None] * size
+        self.ttl = [0] * size
+        self.idx = 0
+
+    def get(self) -> Tuple[Any, bool]:
+        """Get a new batch from the internal ring buffer.
+
+        Returns:
+           buf: Data item saved from inqueue.
+           released: True if the item is now removed from the ring buffer.
+        """
+        if self.ttl[self.idx] <= 0:
+            self.buffers[self.idx] = self.inqueue.get(timeout=self.timeout)
+            self.ttl[self.idx] = self.cur_initial_ttl
+            if self.cur_initial_ttl < self.max_initial_ttl:
+                self.cur_initial_ttl += 1
+        buf = self.buffers[self.idx]
+        self.ttl[self.idx] -= 1
+        released = self.ttl[self.idx] <= 0
+        if released:
+            self.buffers[self.idx] = None
+        self.idx = (self.idx + 1) % len(self.buffers)
+        return buf, released
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/multi_gpu_learner_thread.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/multi_gpu_learner_thread.py
new file mode 100644
index 0000000000000000000000000000000000000000..aacf797b32b8b65da6e49f74492b5684e56bb1b7
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/multi_gpu_learner_thread.py
@@ -0,0 +1,245 @@
+import logging
+import queue
+import threading
+
+from ray.util.timer import _Timer
+from ray.rllib.execution.learner_thread import LearnerThread
+from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
+from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.utils.annotations import OldAPIStack, override
+from ray.rllib.utils.deprecation import deprecation_warning
+from ray.rllib.utils.framework import try_import_tf
+from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder
+from ray.rllib.evaluation.rollout_worker import RolloutWorker
+
+tf1, tf, tfv = try_import_tf()
+
+logger = logging.getLogger(__name__)
+
+
+@OldAPIStack
+class MultiGPULearnerThread(LearnerThread):
+    """Learner that can use multiple GPUs and parallel loading.
+
+    This class is used for async sampling algorithms.
+
+    Example workflow: 2 GPUs and 3 multi-GPU tower stacks.
+    -> On each GPU, there are 3 slots for batches, indexed 0, 1, and 2.
+
+    Workers collect data from env and push it into inqueue:
+    Workers -> (data) -> self.inqueue
+
+    We also have two queues, indicating, which stacks are loaded and which
+    are not.
+    - idle_tower_stacks = [0, 1, 2]  <- all 3 stacks are free at first.
+    - ready_tower_stacks = []  <- None of the 3 stacks is loaded with data.
+
+    `ready_tower_stacks` is managed by `ready_tower_stacks_buffer` for
+    possible minibatch-SGD iterations per loaded batch (this avoids a reload
+    from CPU to GPU for each SGD iter).
+
+    n _MultiGPULoaderThreads: self.inqueue -get()->
+    policy.load_batch_into_buffer() -> ready_stacks = [0 ...]
+
+    This thread: self.ready_tower_stacks_buffer -get()->
+    policy.learn_on_loaded_batch() -> if SGD-iters done,
+    put stack index back in idle_tower_stacks queue.
+    """
+
+    def __init__(
+        self,
+        local_worker: RolloutWorker,
+        num_gpus: int = 1,
+        lr=None,  # deprecated.
+        train_batch_size: int = 500,
+        num_multi_gpu_tower_stacks: int = 1,
+        num_sgd_iter: int = 1,
+        learner_queue_size: int = 16,
+        learner_queue_timeout: int = 300,
+        num_data_load_threads: int = 16,
+        _fake_gpus: bool = False,
+        # Deprecated arg, use
+        minibatch_buffer_size=None,
+    ):
+        """Initializes a MultiGPULearnerThread instance.
+
+        Args:
+            local_worker: Local RolloutWorker holding
+                policies this thread will call `load_batch_into_buffer` and
+                `learn_on_loaded_batch` on.
+            num_gpus: Number of GPUs to use for data-parallel SGD.
+            train_batch_size: Size of batches (minibatches if
+                `num_sgd_iter` > 1) to learn on.
+            num_multi_gpu_tower_stacks: Number of buffers to parallelly
+                load data into on one device. Each buffer is of size of
+                `train_batch_size` and hence increases GPU memory usage
+                accordingly.
+            num_sgd_iter: Number of passes to learn on per train batch
+                (minibatch if `num_sgd_iter` > 1).
+            learner_queue_size: Max size of queue of inbound
+                train batches to this thread.
+            num_data_load_threads: Number of threads to use to load
+                data into GPU memory in parallel.
+        """
+        # Deprecated: No need to specify as we don't need the actual
+        # minibatch-buffer anyways.
+        if minibatch_buffer_size:
+            deprecation_warning(
+                old="MultiGPULearnerThread.minibatch_buffer_size",
+                error=True,
+            )
+        super().__init__(
+            local_worker=local_worker,
+            minibatch_buffer_size=0,
+            num_sgd_iter=num_sgd_iter,
+            learner_queue_size=learner_queue_size,
+            learner_queue_timeout=learner_queue_timeout,
+        )
+        # Delete reference to parent's minibatch_buffer, which is not needed.
+        # Instead, in multi-GPU mode, we pull tower stack indices from the
+        # `self.ready_tower_stacks_buffer` buffer, whose size is exactly
+        # `num_multi_gpu_tower_stacks`.
+        self.minibatch_buffer = None
+
+        self.train_batch_size = train_batch_size
+
+        self.policy_map = self.local_worker.policy_map
+        self.devices = next(iter(self.policy_map.values())).devices
+
+        logger.info("MultiGPULearnerThread devices {}".format(self.devices))
+        assert self.train_batch_size % len(self.devices) == 0
+        assert self.train_batch_size >= len(self.devices), "batch too small"
+
+        self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks))
+
+        # Two queues for tower stacks:
+        # a) Those that are loaded with data ("ready")
+        # b) Those that are ready to be loaded with new data ("idle").
+        self.idle_tower_stacks = queue.Queue()
+        self.ready_tower_stacks = queue.Queue()
+        # In the beginning, all stacks are idle (no loading has taken place
+        # yet).
+        for idx in self.tower_stack_indices:
+            self.idle_tower_stacks.put(idx)
+        # Start n threads that are responsible for loading data into the
+        # different (idle) stacks.
+        for i in range(num_data_load_threads):
+            self.loader_thread = _MultiGPULoaderThread(self, share_stats=(i == 0))
+            self.loader_thread.start()
+
+        # Create a buffer that holds stack indices that are "ready"
+        # (loaded with data). Those are stacks that we can call
+        # "learn_on_loaded_batch" on.
+        self.ready_tower_stacks_buffer = MinibatchBuffer(
+            self.ready_tower_stacks,
+            num_multi_gpu_tower_stacks,
+            learner_queue_timeout,
+            num_sgd_iter,
+        )
+
+    @override(LearnerThread)
+    def step(self) -> None:
+        if not self.loader_thread.is_alive():
+            raise RuntimeError(
+                "The `_MultiGPULoaderThread` has died! Will therefore also terminate "
+                "the `MultiGPULearnerThread`."
+            )
+
+        with self.load_wait_timer:
+            buffer_idx, released = self.ready_tower_stacks_buffer.get()
+
+        get_num_samples_loaded_into_buffer = 0
+        with self.grad_timer:
+            # Use LearnerInfoBuilder as a unified way to build the final
+            # results dict from `learn_on_loaded_batch` call(s).
+            # This makes sure results dicts always have the same structure
+            # no matter the setup (multi-GPU, multi-agent, minibatch SGD,
+            # tf vs torch).
+            learner_info_builder = LearnerInfoBuilder(num_devices=len(self.devices))
+
+            for pid in self.policy_map.keys():
+                # Not a policy-to-train.
+                if (
+                    self.local_worker.is_policy_to_train is not None
+                    and not self.local_worker.is_policy_to_train(pid)
+                ):
+                    continue
+                policy = self.policy_map[pid]
+                default_policy_results = policy.learn_on_loaded_batch(
+                    offset=0, buffer_index=buffer_idx
+                )
+                learner_info_builder.add_learn_on_batch_results(
+                    default_policy_results, policy_id=pid
+                )
+                self.policy_ids_updated.append(pid)
+                get_num_samples_loaded_into_buffer += (
+                    policy.get_num_samples_loaded_into_buffer(buffer_idx)
+                )
+
+            self.learner_info = learner_info_builder.finalize()
+
+        if released:
+            self.idle_tower_stacks.put(buffer_idx)
+
+        # Put tuple: env-steps, agent-steps, and learner info into the queue.
+        self.outqueue.put(
+            (
+                get_num_samples_loaded_into_buffer,
+                get_num_samples_loaded_into_buffer,
+                self.learner_info,
+            )
+        )
+        self.learner_queue_size.push(self.inqueue.qsize())
+
+
+class _MultiGPULoaderThread(threading.Thread):
+    def __init__(
+        self, multi_gpu_learner_thread: MultiGPULearnerThread, share_stats: bool
+    ):
+        threading.Thread.__init__(self)
+        self.multi_gpu_learner_thread = multi_gpu_learner_thread
+        self.daemon = True
+        if share_stats:
+            self.queue_timer = multi_gpu_learner_thread.queue_timer
+            self.load_timer = multi_gpu_learner_thread.load_timer
+        else:
+            self.queue_timer = _Timer()
+            self.load_timer = _Timer()
+
+    def run(self) -> None:
+        while True:
+            self._step()
+
+    def _step(self) -> None:
+        s = self.multi_gpu_learner_thread
+        policy_map = s.policy_map
+
+        # Get a new batch from the data (inqueue).
+        with self.queue_timer:
+            batch = s.inqueue.get()
+
+        # Get next idle stack for loading.
+        buffer_idx = s.idle_tower_stacks.get()
+
+        # Load the batch into the idle stack.
+        with self.load_timer:
+            for pid in policy_map.keys():
+                if (
+                    s.local_worker.is_policy_to_train is not None
+                    and not s.local_worker.is_policy_to_train(pid, batch)
+                ):
+                    continue
+                policy = policy_map[pid]
+                if isinstance(batch, SampleBatch):
+                    policy.load_batch_into_buffer(
+                        batch=batch,
+                        buffer_index=buffer_idx,
+                    )
+                elif pid in batch.policy_batches:
+                    policy.load_batch_into_buffer(
+                        batch=batch.policy_batches[pid],
+                        buffer_index=buffer_idx,
+                    )
+
+        # Tag just-loaded stack as "ready".
+        s.ready_tower_stacks.put(buffer_idx)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/replay_ops.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/replay_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcd1f026cf1e1b3696229f7c27b7f0d504142a64
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/replay_ops.py
@@ -0,0 +1,37 @@
+from typing import Optional
+import random
+
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.utils.replay_buffers.replay_buffer import warn_replay_capacity
+from ray.rllib.utils.typing import SampleBatchType
+
+
+@OldAPIStack
+class SimpleReplayBuffer:
+    """Simple replay buffer that operates over batches."""
+
+    def __init__(self, num_slots: int, replay_proportion: Optional[float] = None):
+        """Initialize SimpleReplayBuffer.
+
+        Args:
+            num_slots: Number of batches to store in total.
+        """
+        self.num_slots = num_slots
+        self.replay_batches = []
+        self.replay_index = 0
+
+    def add_batch(self, sample_batch: SampleBatchType) -> None:
+        warn_replay_capacity(item=sample_batch, num_items=self.num_slots)
+        if self.num_slots > 0:
+            if len(self.replay_batches) < self.num_slots:
+                self.replay_batches.append(sample_batch)
+            else:
+                self.replay_batches[self.replay_index] = sample_batch
+                self.replay_index += 1
+                self.replay_index %= self.num_slots
+
+    def replay(self) -> SampleBatchType:
+        return random.choice(self.replay_batches)
+
+    def __len__(self):
+        return len(self.replay_batches)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/rollout_ops.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/rollout_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9a683fa7dbff4a5640f5b60f4845149ed9aed58
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/rollout_ops.py
@@ -0,0 +1,207 @@
+import logging
+from typing import List, Optional, Union
+import tree
+
+from ray.rllib.env.env_runner_group import EnvRunnerGroup
+from ray.rllib.policy.sample_batch import (
+    SampleBatch,
+    DEFAULT_POLICY_ID,
+    concat_samples,
+)
+from ray.rllib.utils.annotations import ExperimentalAPI, OldAPIStack
+from ray.rllib.utils.metrics import NUM_AGENT_STEPS_SAMPLED, NUM_ENV_STEPS_SAMPLED
+from ray.rllib.utils.sgd import standardized
+from ray.rllib.utils.typing import EpisodeType, SampleBatchType
+
+logger = logging.getLogger(__name__)
+
+
+@ExperimentalAPI
+def synchronous_parallel_sample(
+    *,
+    worker_set: EnvRunnerGroup,
+    max_agent_steps: Optional[int] = None,
+    max_env_steps: Optional[int] = None,
+    concat: bool = True,
+    sample_timeout_s: Optional[float] = None,
+    random_actions: bool = False,
+    _uses_new_env_runners: bool = False,
+    _return_metrics: bool = False,
+) -> Union[List[SampleBatchType], SampleBatchType, List[EpisodeType], EpisodeType]:
+    """Runs parallel and synchronous rollouts on all remote workers.
+
+    Waits for all workers to return from the remote calls.
+
+    If no remote workers exist (num_workers == 0), use the local worker
+    for sampling.
+
+    Alternatively to calling `worker.sample.remote()`, the user can provide a
+    `remote_fn()`, which will be applied to the worker(s) instead.
+
+    Args:
+        worker_set: The EnvRunnerGroup to use for sampling.
+        remote_fn: If provided, use `worker.apply.remote(remote_fn)` instead
+            of `worker.sample.remote()` to generate the requests.
+        max_agent_steps: Optional number of agent steps to be included in the
+            final batch or list of episodes.
+        max_env_steps: Optional number of environment steps to be included in the
+            final batch or list of episodes.
+        concat: Whether to aggregate all resulting batches or episodes. in case of
+            batches the list of batches is concatinated at the end. in case of
+            episodes all episode lists from workers are flattened into a single list.
+        sample_timeout_s: The timeout in sec to use on the `foreach_env_runner` call.
+            After this time, the call will return with a result (or not if all
+            EnvRunners are stalling). If None, will block indefinitely and not timeout.
+        _uses_new_env_runners: Whether the new `EnvRunner API` is used. In this case
+            episodes instead of `SampleBatch` objects are returned.
+
+    Returns:
+        The list of collected sample batch types or episode types (one for each parallel
+        rollout worker in the given `worker_set`).
+
+    .. testcode::
+
+        # Define an RLlib Algorithm.
+        from ray.rllib.algorithms.ppo import PPO, PPOConfig
+        config = (
+            PPOConfig()
+            .environment("CartPole-v1")
+        )
+        algorithm = config.build()
+        # 2 remote EnvRunners (num_env_runners=2):
+        episodes = synchronous_parallel_sample(
+            worker_set=algorithm.env_runner_group,
+            _uses_new_env_runners=True,
+            concat=False,
+        )
+        print(len(episodes))
+
+    .. testoutput::
+
+        2
+    """
+    # Only allow one of `max_agent_steps` or `max_env_steps` to be defined.
+    assert not (max_agent_steps is not None and max_env_steps is not None)
+
+    agent_or_env_steps = 0
+    max_agent_or_env_steps = max_agent_steps or max_env_steps or None
+    sample_batches_or_episodes = []
+    all_stats_dicts = []
+
+    random_action_kwargs = {} if not random_actions else {"random_actions": True}
+
+    # Stop collecting batches as soon as one criterium is met.
+    while (max_agent_or_env_steps is None and agent_or_env_steps == 0) or (
+        max_agent_or_env_steps is not None
+        and agent_or_env_steps < max_agent_or_env_steps
+    ):
+        # No remote workers in the set -> Use local worker for collecting
+        # samples.
+        if worker_set.num_remote_workers() <= 0:
+            sampled_data = [worker_set.local_env_runner.sample(**random_action_kwargs)]
+            if _return_metrics:
+                stats_dicts = [worker_set.local_env_runner.get_metrics()]
+        # Loop over remote workers' `sample()` method in parallel.
+        else:
+            sampled_data = worker_set.foreach_env_runner(
+                (
+                    (lambda w: w.sample(**random_action_kwargs))
+                    if not _return_metrics
+                    else (lambda w: (w.sample(**random_action_kwargs), w.get_metrics()))
+                ),
+                local_env_runner=False,
+                timeout_seconds=sample_timeout_s,
+            )
+            # Nothing was returned (maybe all workers are stalling) or no healthy
+            # remote workers left: Break.
+            # There is no point staying in this loop, since we will not be able to
+            # get any new samples if we don't have any healthy remote workers left.
+            if not sampled_data or worker_set.num_healthy_remote_workers() <= 0:
+                if not sampled_data:
+                    logger.warning(
+                        "No samples returned from remote workers. If you have a "
+                        "slow environment or model, consider increasing the "
+                        "`sample_timeout_s` or decreasing the "
+                        "`rollout_fragment_length` in `AlgorithmConfig.env_runners()."
+                    )
+                elif worker_set.num_healthy_remote_workers() <= 0:
+                    logger.warning(
+                        "No healthy remote workers left. Trying to restore workers ..."
+                    )
+                break
+
+            if _return_metrics:
+                stats_dicts = [s[1] for s in sampled_data]
+                sampled_data = [s[0] for s in sampled_data]
+
+        # Update our counters for the stopping criterion of the while loop.
+        if _return_metrics:
+            if max_agent_steps:
+                agent_or_env_steps += sum(
+                    int(agent_stat)
+                    for stat_dict in stats_dicts
+                    for agent_stat in stat_dict[NUM_AGENT_STEPS_SAMPLED].values()
+                )
+            else:
+                agent_or_env_steps += sum(
+                    int(stat_dict[NUM_ENV_STEPS_SAMPLED]) for stat_dict in stats_dicts
+                )
+            sample_batches_or_episodes.extend(sampled_data)
+            all_stats_dicts.extend(stats_dicts)
+        else:
+            for batch_or_episode in sampled_data:
+                if max_agent_steps:
+                    agent_or_env_steps += (
+                        sum(e.agent_steps() for e in batch_or_episode)
+                        if _uses_new_env_runners
+                        else batch_or_episode.agent_steps()
+                    )
+                else:
+                    agent_or_env_steps += (
+                        sum(e.env_steps() for e in batch_or_episode)
+                        if _uses_new_env_runners
+                        else batch_or_episode.env_steps()
+                    )
+                sample_batches_or_episodes.append(batch_or_episode)
+                # Break out (and ignore the remaining samples) if max timesteps (batch
+                # size) reached. We want to avoid collecting batches that are too large
+                # only because of a failed/restarted worker causing a second iteration
+                # of the main loop.
+                if (
+                    max_agent_or_env_steps is not None
+                    and agent_or_env_steps >= max_agent_or_env_steps
+                ):
+                    break
+
+    if concat is True:
+        # If we have episodes flatten the episode list.
+        if _uses_new_env_runners:
+            sample_batches_or_episodes = tree.flatten(sample_batches_or_episodes)
+        # Otherwise we concatenate the `SampleBatch` objects
+        else:
+            sample_batches_or_episodes = concat_samples(sample_batches_or_episodes)
+
+    if _return_metrics:
+        return sample_batches_or_episodes, all_stats_dicts
+    return sample_batches_or_episodes
+
+
+@OldAPIStack
+def standardize_fields(samples: SampleBatchType, fields: List[str]) -> SampleBatchType:
+    """Standardize fields of the given SampleBatch"""
+    wrapped = False
+
+    if isinstance(samples, SampleBatch):
+        samples = samples.as_multi_agent()
+        wrapped = True
+
+    for policy_id in samples.policy_batches:
+        batch = samples.policy_batches[policy_id]
+        for field in fields:
+            if field in batch:
+                batch[field] = standardized(batch[field])
+
+    if wrapped:
+        samples = samples.policy_batches[DEFAULT_POLICY_ID]
+
+    return samples
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/segment_tree.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/segment_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e7a5fd102f60ec53ca2a1a39f1ea68205f10ed2
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/segment_tree.py
@@ -0,0 +1,212 @@
+import operator
+from typing import Any, Optional
+
+
+class SegmentTree:
+    """A Segment Tree data structure.
+
+    https://en.wikipedia.org/wiki/Segment_tree
+
+    Can be used as regular array, but with two important differences:
+
+      a) Setting an item's value is slightly slower. It is O(lg capacity),
+         instead of O(1).
+      b) Offers efficient `reduce` operation which reduces the tree's values
+         over some specified contiguous subsequence of items in the array.
+         Operation could be e.g. min/max/sum.
+
+    The data is stored in a list, where the length is 2 * capacity.
+    The second half of the list stores the actual values for each index, so if
+    capacity=8, values are stored at indices 8 to 15. The first half of the
+    array contains the reduced-values of the different (binary divided)
+    segments, e.g. (capacity=4):
+    0=not used
+    1=reduced-value over all elements (array indices 4 to 7).
+    2=reduced-value over array indices (4 and 5).
+    3=reduced-value over array indices (6 and 7).
+    4-7: values of the tree.
+    NOTE that the values of the tree are accessed by indices starting at 0, so
+    `tree[0]` accesses `internal_array[4]` in the above example.
+    """
+
+    def __init__(
+        self, capacity: int, operation: Any, neutral_element: Optional[Any] = None
+    ):
+        """Initializes a Segment Tree object.
+
+        Args:
+            capacity: Total size of the array - must be a power of two.
+            operation: Lambda obj, obj -> obj
+                The operation for combining elements (eg. sum, max).
+                Must be a mathematical group together with the set of
+                possible values for array elements.
+            neutral_element (Optional[obj]): The neutral element for
+                `operation`. Use None for automatically finding a value:
+                max: float("-inf"), min: float("inf"), sum: 0.0.
+        """
+
+        assert (
+            capacity > 0 and capacity & (capacity - 1) == 0
+        ), "Capacity must be positive and a power of 2!"
+        self.capacity = capacity
+        if neutral_element is None:
+            neutral_element = (
+                0.0
+                if operation is operator.add
+                else float("-inf")
+                if operation is max
+                else float("inf")
+            )
+        self.neutral_element = neutral_element
+        self.value = [self.neutral_element for _ in range(2 * capacity)]
+        self.operation = operation
+
+    def reduce(self, start: int = 0, end: Optional[int] = None) -> Any:
+        """Applies `self.operation` to subsequence of our values.
+
+        Subsequence is contiguous, includes `start` and excludes `end`.
+
+          self.operation(
+              arr[start], operation(arr[start+1], operation(... arr[end])))
+
+        Args:
+            start: Start index to apply reduction to.
+            end (Optional[int]): End index to apply reduction to (excluded).
+
+        Returns:
+            any: The result of reducing self.operation over the specified
+                range of `self._value` elements.
+        """
+        if end is None:
+            end = self.capacity
+        elif end < 0:
+            end += self.capacity
+
+        # Init result with neutral element.
+        result = self.neutral_element
+        # Map start/end to our actual index space (second half of array).
+        start += self.capacity
+        end += self.capacity
+
+        # Example:
+        # internal-array (first half=sums, second half=actual values):
+        # 0 1 2 3 | 4 5 6 7
+        # - 6 1 5 | 1 0 2 3
+
+        # tree.sum(0, 3) = 3
+        # internally: start=4, end=7 -> sum values 1 0 2 = 3.
+
+        # Iterate over tree starting in the actual-values (second half)
+        # section.
+        # 1) start=4 is even -> do nothing.
+        # 2) end=7 is odd -> end-- -> end=6 -> add value to result: result=2
+        # 3) int-divide start and end by 2: start=2, end=3
+        # 4) start still smaller end -> iterate once more.
+        # 5) start=2 is even -> do nothing.
+        # 6) end=3 is odd -> end-- -> end=2 -> add value to result: result=1
+        #    NOTE: This adds the sum of indices 4 and 5 to the result.
+
+        # Iterate as long as start != end.
+        while start < end:
+
+            # If start is odd: Add its value to result and move start to
+            # next even value.
+            if start & 1:
+                result = self.operation(result, self.value[start])
+                start += 1
+
+            # If end is odd: Move end to previous even value, then add its
+            # value to result. NOTE: This takes care of excluding `end` in any
+            # situation.
+            if end & 1:
+                end -= 1
+                result = self.operation(result, self.value[end])
+
+            # Divide both start and end by 2 to make them "jump" into the
+            # next upper level reduce-index space.
+            start //= 2
+            end //= 2
+
+            # Then repeat till start == end.
+
+        return result
+
+    def __setitem__(self, idx: int, val: float) -> None:
+        """
+        Inserts/overwrites a value in/into the tree.
+
+        Args:
+            idx: The index to insert to. Must be in [0, `self.capacity`[
+            val: The value to insert.
+        """
+        assert 0 <= idx < self.capacity, f"idx={idx} capacity={self.capacity}"
+
+        # Index of the leaf to insert into (always insert in "second half"
+        # of the tree, the first half is reserved for already calculated
+        # reduction-values).
+        idx += self.capacity
+        self.value[idx] = val
+
+        # Recalculate all affected reduction values (in "first half" of tree).
+        idx = idx >> 1  # Divide by 2 (faster than division).
+        while idx >= 1:
+            update_idx = 2 * idx  # calculate only once
+            # Update the reduction value at the correct "first half" idx.
+            self.value[idx] = self.operation(
+                self.value[update_idx], self.value[update_idx + 1]
+            )
+            idx = idx >> 1  # Divide by 2 (faster than division).
+
+    def __getitem__(self, idx: int) -> Any:
+        assert 0 <= idx < self.capacity
+        return self.value[idx + self.capacity]
+
+    def get_state(self):
+        return self.value
+
+    def set_state(self, state):
+        assert len(state) == self.capacity * 2
+        self.value = state
+
+
+class SumSegmentTree(SegmentTree):
+    """A SegmentTree with the reduction `operation`=operator.add."""
+
+    def __init__(self, capacity: int):
+        super(SumSegmentTree, self).__init__(capacity=capacity, operation=operator.add)
+
+    def sum(self, start: int = 0, end: Optional[Any] = None) -> Any:
+        """Returns the sum over a sub-segment of the tree."""
+        return self.reduce(start, end)
+
+    def find_prefixsum_idx(self, prefixsum: float) -> int:
+        """Finds highest i, for which: sum(arr[0]+..+arr[i - i]) <= prefixsum.
+
+        Args:
+            prefixsum: `prefixsum` upper bound in above constraint.
+
+        Returns:
+            int: Largest possible index (i) satisfying above constraint.
+        """
+        assert 0 <= prefixsum <= self.sum() + 1e-5
+        # Global sum node.
+        idx = 1
+
+        # While non-leaf (first half of tree).
+        while idx < self.capacity:
+            update_idx = 2 * idx
+            if self.value[update_idx] > prefixsum:
+                idx = update_idx
+            else:
+                prefixsum -= self.value[update_idx]
+                idx = update_idx + 1
+        return idx - self.capacity
+
+
+class MinSegmentTree(SegmentTree):
+    def __init__(self, capacity: int):
+        super(MinSegmentTree, self).__init__(capacity=capacity, operation=min)
+
+    def min(self, start: int = 0, end: Optional[Any] = None) -> Any:
+        """Returns min(arr[start], ...,  arr[end])"""
+        return self.reduce(start, end)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/execution/train_ops.py b/.venv/lib/python3.11/site-packages/ray/rllib/execution/train_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b2b76bc671e7dab8db39fe1da38d1e577563afe
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/execution/train_ops.py
@@ -0,0 +1,204 @@
+import logging
+import numpy as np
+import math
+from typing import Dict
+
+from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
+from ray.rllib.utils.annotations import OldAPIStack
+from ray.rllib.utils.framework import try_import_tf
+from ray.rllib.utils.deprecation import deprecation_warning
+from ray.rllib.utils.metrics import (
+    NUM_ENV_STEPS_TRAINED,
+    NUM_AGENT_STEPS_TRAINED,
+    LEARN_ON_BATCH_TIMER,
+    LOAD_BATCH_TIMER,
+)
+from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder
+from ray.rllib.utils.sgd import do_minibatch_sgd
+from ray.util import log_once
+
+tf1, tf, tfv = try_import_tf()
+
+logger = logging.getLogger(__name__)
+
+
+@OldAPIStack
+def train_one_step(algorithm, train_batch, policies_to_train=None) -> Dict:
+    """Function that improves the all policies in `train_batch` on the local worker.
+
+    .. testcode::
+        :skipif: True
+
+        from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
+        algo = [...]
+        train_batch = synchronous_parallel_sample(algo.env_runner_group)
+        # This trains the policy on one batch.
+        print(train_one_step(algo, train_batch)))
+
+    .. testoutput::
+
+        {"default_policy": ...}
+
+    Updates the NUM_ENV_STEPS_TRAINED and NUM_AGENT_STEPS_TRAINED counters as well as
+    the LEARN_ON_BATCH_TIMER timer of the `algorithm` object.
+    """
+    config = algorithm.config
+    workers = algorithm.env_runner_group
+    local_worker = workers.local_env_runner
+    num_sgd_iter = config.get("num_epochs", config.get("num_sgd_iter", 1))
+    minibatch_size = config.get("minibatch_size")
+    if minibatch_size is None:
+        minibatch_size = config.get("sgd_minibatch_size", 0)
+
+    learn_timer = algorithm._timers[LEARN_ON_BATCH_TIMER]
+    with learn_timer:
+        # Subsample minibatches (size=`minibatch_size`) from the
+        # train batch and loop through train batch `num_sgd_iter` times.
+        if num_sgd_iter > 1 or minibatch_size > 0:
+            info = do_minibatch_sgd(
+                train_batch,
+                {
+                    pid: local_worker.get_policy(pid)
+                    for pid in policies_to_train
+                    or local_worker.get_policies_to_train(train_batch)
+                },
+                local_worker,
+                num_sgd_iter,
+                minibatch_size,
+                [],
+            )
+        # Single update step using train batch.
+        else:
+            info = local_worker.learn_on_batch(train_batch)
+
+    learn_timer.push_units_processed(train_batch.count)
+    algorithm._counters[NUM_ENV_STEPS_TRAINED] += train_batch.count
+    algorithm._counters[NUM_AGENT_STEPS_TRAINED] += train_batch.agent_steps()
+
+    if algorithm.reward_estimators:
+        info[DEFAULT_POLICY_ID]["off_policy_estimation"] = {}
+        for name, estimator in algorithm.reward_estimators.items():
+            info[DEFAULT_POLICY_ID]["off_policy_estimation"][name] = estimator.train(
+                train_batch
+            )
+    return info
+
+
+@OldAPIStack
+def multi_gpu_train_one_step(algorithm, train_batch) -> Dict:
+    """Multi-GPU version of train_one_step.
+
+    Uses the policies' `load_batch_into_buffer` and `learn_on_loaded_batch` methods
+    to be more efficient wrt CPU/GPU data transfers. For example, when doing multiple
+    passes through a train batch (e.g. for PPO) using `config.num_sgd_iter`, the
+    actual train batch is only split once and loaded once into the GPU(s).
+
+    .. testcode::
+        :skipif: True
+
+        from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
+        algo = [...]
+        train_batch = synchronous_parallel_sample(algo.env_runner_group)
+        # This trains the policy on one batch.
+        print(multi_gpu_train_one_step(algo, train_batch)))
+
+    .. testoutput::
+
+        {"default_policy": ...}
+
+    Updates the NUM_ENV_STEPS_TRAINED and NUM_AGENT_STEPS_TRAINED counters as well as
+    the LOAD_BATCH_TIMER and LEARN_ON_BATCH_TIMER timers of the Algorithm instance.
+    """
+    if log_once("mulit_gpu_train_one_step_deprecation_warning"):
+        deprecation_warning(
+            old=("ray.rllib.execution.train_ops." "multi_gpu_train_one_step")
+        )
+    config = algorithm.config
+    workers = algorithm.env_runner_group
+    local_worker = workers.local_env_runner
+    num_sgd_iter = config.get("num_epochs", config.get("num_sgd_iter", 1))
+    minibatch_size = config.get("minibatch_size")
+    if minibatch_size is None:
+        minibatch_size = config["train_batch_size"]
+
+    # Determine the number of devices (GPUs or 1 CPU) we use.
+    num_devices = int(math.ceil(config["num_gpus"] or 1))
+
+    # Make sure total batch size is dividable by the number of devices.
+    # Batch size per tower.
+    per_device_batch_size = minibatch_size // num_devices
+    # Total batch size.
+    batch_size = per_device_batch_size * num_devices
+    assert batch_size % num_devices == 0
+    assert batch_size >= num_devices, "Batch size too small!"
+
+    # Handle everything as if multi-agent.
+    train_batch = train_batch.as_multi_agent()
+
+    # Load data into GPUs.
+    load_timer = algorithm._timers[LOAD_BATCH_TIMER]
+    with load_timer:
+        num_loaded_samples = {}
+        for policy_id, batch in train_batch.policy_batches.items():
+            # Not a policy-to-train.
+            if (
+                local_worker.is_policy_to_train is not None
+                and not local_worker.is_policy_to_train(policy_id, train_batch)
+            ):
+                continue
+
+            # Decompress SampleBatch, in case some columns are compressed.
+            batch.decompress_if_needed()
+
+            # Load the entire train batch into the Policy's only buffer
+            # (idx=0). Policies only have >1 buffers, if we are training
+            # asynchronously.
+            num_loaded_samples[policy_id] = local_worker.policy_map[
+                policy_id
+            ].load_batch_into_buffer(batch, buffer_index=0)
+
+    # Execute minibatch SGD on loaded data.
+    learn_timer = algorithm._timers[LEARN_ON_BATCH_TIMER]
+    with learn_timer:
+        # Use LearnerInfoBuilder as a unified way to build the final
+        # results dict from `learn_on_loaded_batch` call(s).
+        # This makes sure results dicts always have the same structure
+        # no matter the setup (multi-GPU, multi-agent, minibatch SGD,
+        # tf vs torch).
+        learner_info_builder = LearnerInfoBuilder(num_devices=num_devices)
+
+        for policy_id, samples_per_device in num_loaded_samples.items():
+            policy = local_worker.policy_map[policy_id]
+            num_batches = max(1, int(samples_per_device) // int(per_device_batch_size))
+            logger.debug("== sgd epochs for {} ==".format(policy_id))
+            for _ in range(num_sgd_iter):
+                permutation = np.random.permutation(num_batches)
+                for batch_index in range(num_batches):
+                    # Learn on the pre-loaded data in the buffer.
+                    # Note: For minibatch SGD, the data is an offset into
+                    # the pre-loaded entire train batch.
+                    results = policy.learn_on_loaded_batch(
+                        permutation[batch_index] * per_device_batch_size, buffer_index=0
+                    )
+
+                    learner_info_builder.add_learn_on_batch_results(results, policy_id)
+
+        # Tower reduce and finalize results.
+        learner_info = learner_info_builder.finalize()
+
+    load_timer.push_units_processed(train_batch.count)
+    learn_timer.push_units_processed(train_batch.count)
+
+    # TODO: Move this into Algorithm's `training_step` method for
+    #  better transparency.
+    algorithm._counters[NUM_ENV_STEPS_TRAINED] += train_batch.count
+    algorithm._counters[NUM_AGENT_STEPS_TRAINED] += train_batch.agent_steps()
+
+    if algorithm.reward_estimators:
+        learner_info[DEFAULT_POLICY_ID]["off_policy_estimation"] = {}
+        for name, estimator in algorithm.reward_estimators.items():
+            learner_info[DEFAULT_POLICY_ID]["off_policy_estimation"][
+                name
+            ] = estimator.train(train_batch)
+
+    return learner_info
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..daf782b58e5950d3868201afb5b78b1b1f33945e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__pycache__/cleanup_experiment.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__pycache__/cleanup_experiment.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..38fe8912d1b3524f1817bc28806da7b362bc8f2b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/__pycache__/cleanup_experiment.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/cleanup_experiment.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/cleanup_experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..370568db79959b64b4232caeaf879219c71c050f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/cleanup_experiment.py
@@ -0,0 +1,186 @@
+"""
+This script automates cleaning up a benchmark/experiment run of some algo
+against some config (with possibly more than one tune trial,
+e.g. torch=grid_search([True, False])).
+
+Run `python cleanup_experiment.py --help` for more information.
+
+Use on an input directory with trial contents e.g.:
+..
+IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k
+IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf
+IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72
+IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_
+IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y
+IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za
+
+Then run:
+>> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs]
+>>   --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3
+>>   --results-max-size [max results file size in kb before(!) zipping]
+
+The script will create one output sub-dir for each trial and only copy
+the configuration and the csv results (filtered and every nth row removed
+based on the given args).
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import yaml
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--experiment-dir",
+    type=str,
+    help="Experiment dir in which all sub-runs (seeds) are "
+    "located (as sub-dirs). Each sub0-run dir must contain the files: "
+    "params.json and progress.csv.",
+)
+parser.add_argument(
+    "--output-dir",
+    type=str,
+    help="The output dir, in which the cleaned up output will be placed.",
+)
+parser.add_argument(
+    "--results-filter",
+    type=str,
+    help="comma-separated list of csv fields to exclude.",
+    default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_"
+    "reward,hist_stats/episode_lengths,experiment_tag",
+)
+parser.add_argument(
+    "--results-max-size",
+    type=int,
+    help="the max. size of the final results.csv file (in kb). Will erase "
+    "every nth line in the original input to reach that goal. "
+    "Use 0 for no limit (default=100).",
+    default=100,
+)
+
+
+def process_single_run(in_dir, out_dir):
+    exp_dir = os.listdir(in_dir)
+
+    # Make sure trials dir is ok.
+    assert (
+        "params.json" in exp_dir and "progress.csv" in exp_dir
+    ), "params.json or progress.csv not found in {}!".format(in_dir)
+
+    os.makedirs(out_dir, exist_ok=True)
+
+    for file in exp_dir:
+        absfile = os.path.join(in_dir, file)
+        # Config file -> Convert to yaml and move to output dir.
+        if file == "params.json":
+            assert os.path.isfile(absfile), "{} not a file!".format(file)
+            with open(absfile) as fp:
+                contents = json.load(fp)
+            with open(os.path.join(out_dir, "config.yaml"), "w") as fp:
+                yaml.dump(contents, fp)
+        # Progress csv file -> Filter out some columns, cut, and write to
+        # output_dir.
+        elif file == "progress.csv":
+            assert os.path.isfile(absfile), "{} not a file!".format(file)
+            col_idx_to_filter = []
+            with open(absfile) as fp:
+                # Get column names.
+                col_names_orig = fp.readline().strip().split(",")
+                # Split by comma (abiding to quotes), filter out
+                # unwanted columns, then write to disk.
+                cols_to_filter = args.results_filter.split(",")
+                for i, c in enumerate(col_names_orig):
+                    if c in cols_to_filter:
+                        col_idx_to_filter.insert(0, i)
+                col_names = col_names_orig.copy()
+                for idx in col_idx_to_filter:
+                    col_names.pop(idx)
+                absfile_out = os.path.join(out_dir, "progress.csv")
+                with open(absfile_out, "w") as out_fp:
+                    print(",".join(col_names), file=out_fp)
+                    while True:
+                        line = fp.readline().strip()
+                        if not line:
+                            break
+                        line = re.sub(
+                            "(,{2,})",
+                            lambda m: ",None" * (len(m.group()) - 1) + ",",
+                            line,
+                        )
+                        cols = re.findall('".+?"|[^,]+', line)
+                        if len(cols) != len(col_names_orig):
+                            continue
+                        for idx in col_idx_to_filter:
+                            cols.pop(idx)
+                        print(",".join(cols), file=out_fp)
+
+            # Reduce the size of the output file if necessary.
+            out_size = os.path.getsize(absfile_out)
+            max_size = args.results_max_size * 1024
+            if 0 < max_size < out_size:
+                # Figure out roughly every which line we have to drop.
+                ratio = out_size / max_size
+                # If ratio > 2.0, we'll have to keep only every nth line.
+                if ratio > 2.0:
+                    nth = out_size // max_size
+                    os.system(
+                        "awk 'NR==1||NR%{}==0' {} > {}.new".format(
+                            nth, absfile_out, absfile_out
+                        )
+                    )
+                # If ratio < 2.0 (>1.0), we'll have to drop every nth line.
+                else:
+                    nth = out_size // (out_size - max_size)
+                    os.system(
+                        "awk 'NR==1||NR%{}!=0' {} > {}.new".format(
+                            nth, absfile_out, absfile_out
+                        )
+                    )
+                os.remove(absfile_out)
+                os.rename(absfile_out + ".new", absfile_out)
+
+            # Zip progress.csv into results.zip.
+            zip_file = os.path.join(out_dir, "results.zip")
+            try:
+                os.remove(zip_file)
+            except FileNotFoundError:
+                pass
+            os.system(
+                "zip -j {} {}".format(zip_file, os.path.join(out_dir, "progress.csv"))
+            )
+            os.remove(os.path.join(out_dir, "progress.csv"))
+
+        # TBX events file -> Move as is.
+        elif re.search("^(events\\.out\\.|params\\.pkl)", file):
+            assert os.path.isfile(absfile), "{} not a file!".format(file)
+            shutil.copyfile(absfile, os.path.join(out_dir, file))
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    exp_dir = os.listdir(args.experiment_dir)
+    # Loop through all sub-directories.
+    for i, sub_run in enumerate(sorted(exp_dir)):
+        abspath = os.path.join(args.experiment_dir, sub_run)
+        # This is a seed run.
+        if os.path.isdir(abspath) and re.search(
+            "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run
+        ):
+            # Create meaningful output dir name:
+            # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD].
+            cleaned_up_out = re.sub(
+                "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})"
+                "_\\d{2}-\\d{2}-\\w+",
+                "{:02}_\\1_\\2\\4\\5".format(i),
+                sub_run,
+            )
+            # Remove superflous `env=` specifier (anv always included in name).
+            cleaned_up_out = re.sub(
+                "^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2", cleaned_up_out
+            )
+            out_path = os.path.join(args.output_dir, cleaned_up_out)
+            process_single_run(abspath, out_path)
+    # Done.
+    print("done")
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__init__.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..379bbd74be04021c3b54583d01e3922740ddb313
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/atari_100k.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/atari_100k.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d11d0f3317fbb9bc9eb159d2a1f7ae1c9718979
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/atari_100k.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/atari_200M.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/atari_200M.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b471cff2a2f2921d6df83d38e128d4b2467fb05
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/atari_200M.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/cartpole.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/cartpole.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0961815d0a6fecb7f3925020cc4a6c9ac77aee31
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/cartpole.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/dm_control_suite_vision.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/dm_control_suite_vision.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..56f8d94e76494eb0e31efc2b6e785eb59b5cf80f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/dm_control_suite_vision.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/flappy_bird.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/flappy_bird.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e546863d2105de3236277ab84eacb03b13250902
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/flappy_bird.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/frozenlake_2x2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/frozenlake_2x2.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ba94abb53c3e1fd83c720bb653e07952c3afe50
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/frozenlake_2x2.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/frozenlake_4x4_deterministic.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/frozenlake_4x4_deterministic.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b60a4276140c61d76c9b2aa63269c28f25ea6142
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/frozenlake_4x4_deterministic.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/gymnasium_robotics.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/gymnasium_robotics.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e58d3aa7368334d140fd6623afd4d581cf68edde
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/gymnasium_robotics.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/highway_env.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/highway_env.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..030643596cdd92b59241af47e0c2720f312168d0
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/highway_env.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/pendulum.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/pendulum.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6207c652dd2a92da9383ea2a150dda2ac0f22b7d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/__pycache__/pendulum.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/atari_100k.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/atari_100k.py
new file mode 100644
index 0000000000000000000000000000000000000000..60419424124d967351049d1d52b824218d690d61
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/atari_100k.py
@@ -0,0 +1,74 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+
+# Run with:
+# python [this script name].py --env ale_py:ALE/[gym ID e.g. Pong-v5]
+
+# To see all available options:
+# python [this script name].py --help
+
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+from ray.rllib.utils.test_utils import add_rllib_example_script_args
+
+parser = add_rllib_example_script_args(
+    default_iters=1000000,
+    default_reward=20.0,
+    default_timesteps=100000,
+)
+# Use `parser` to add your own custom command line options to this script
+# and (if needed) use their values to set up `config` below.
+args = parser.parse_args()
+
+config = (
+    DreamerV3Config()
+    .environment(
+        env=args.env,
+        # [2]: "We follow the evaluation protocol of Machado et al. (2018) with 200M
+        # environment steps, action repeat of 4, a time limit of 108,000 steps per
+        # episode that correspond to 30 minutes of game play, no access to life
+        # information, full action space, and sticky actions. Because the world model
+        # integrates information over time, DreamerV2 does not use frame stacking.
+        # The experiments use a single-task setup where a separate agent is trained
+        # for each game. Moreover, each agent uses only a single environment instance.
+        env_config={
+            # "sticky actions" but not according to Danijar's 100k configs.
+            "repeat_action_probability": 0.0,
+            # "full action space" but not according to Danijar's 100k configs.
+            "full_action_space": False,
+            # Already done by MaxAndSkip wrapper: "action repeat" == 4.
+            "frameskip": 1,
+        },
+    )
+    .env_runners(
+        num_env_runners=(args.num_env_runners or 0),
+        # If we use >1 GPU and increase the batch size accordingly, we should also
+        # increase the number of envs per worker.
+        num_envs_per_env_runner=(args.num_learners or 1),
+        remote_worker_envs=(args.num_learners > 1),
+    )
+    .reporting(
+        metrics_num_episodes_for_smoothing=(args.num_learners or 1),
+        report_images_and_videos=False,
+        report_dream_data=False,
+        report_individual_batch_item_stats=False,
+    )
+    # See Appendix A.
+    .training(
+        model_size="S",
+        training_ratio=1024,
+        batch_size_B=16 * (args.num_learners or 1),
+    )
+)
+
+
+if __name__ == "__main__":
+    from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
+
+    run_rllib_example_script_experiment(config, args, keep_config=True)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/atari_200M.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/atari_200M.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff13e90bb32d2fdb0ea823ed9505804ea056ce0c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/atari_200M.py
@@ -0,0 +1,80 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+
+# Run with:
+# python [this script name].py --env ale_py:ALE/[gym ID e.g. Pong-v5]
+
+# To see all available options:
+# python [this script name].py --help
+
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+from ray.rllib.utils.test_utils import add_rllib_example_script_args
+
+parser = add_rllib_example_script_args(
+    default_iters=1000000,
+    default_reward=20.0,
+    default_timesteps=1000000,
+)
+# Use `parser` to add your own custom command line options to this script
+# and (if needed) use their values to set up `config` below.
+args = parser.parse_args()
+
+config = (
+    DreamerV3Config()
+    .resources(
+        # For each (parallelized) env, we should provide a CPU. Lower this number
+        # if you don't have enough CPUs.
+        num_cpus_for_main_process=8
+        * (args.num_learners or 1),
+    )
+    .environment(
+        env=args.env,
+        # [2]: "We follow the evaluation protocol of Machado et al. (2018) with 200M
+        # environment steps, action repeat of 4, a time limit of 108,000 steps per
+        # episode that correspond to 30 minutes of game play, no access to life
+        # information, full action space, and sticky actions. Because the world model
+        # integrates information over time, DreamerV2 does not use frame stacking.
+        # The experiments use a single-task setup where a separate agent is trained
+        # for each game. Moreover, each agent uses only a single environment instance.
+        env_config={
+            # "sticky actions" but not according to Danijar's 100k configs.
+            "repeat_action_probability": 0.0,
+            # "full action space" but not according to Danijar's 100k configs.
+            "full_action_space": False,
+            # Already done by MaxAndSkip wrapper: "action repeat" == 4.
+            "frameskip": 1,
+        },
+    )
+    .env_runners(
+        num_env_runners=(args.num_env_runners or 0),
+        # If we use >1 GPU and increase the batch size accordingly, we should also
+        # increase the number of envs per worker.
+        num_envs_per_env_runner=8 * (args.num_learners or 1),
+        remote_worker_envs=True,
+    )
+    .reporting(
+        metrics_num_episodes_for_smoothing=(args.num_learners or 1),
+        report_images_and_videos=False,
+        report_dream_data=False,
+        report_individual_batch_item_stats=False,
+    )
+    # See Appendix A.
+    .training(
+        model_size="XL",
+        training_ratio=64,
+        batch_size_B=16 * (args.num_learners or 1),
+    )
+)
+
+
+if __name__ == "__main__":
+    from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
+
+    run_rllib_example_script_experiment(config, args, keep_config=True)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/cartpole.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8131519974172f5ef7174b66027ba04f3463eb6
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/cartpole.py
@@ -0,0 +1,22 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+config = (
+    DreamerV3Config()
+    .environment("CartPole-v1")
+    .training(
+        model_size="XS",
+        training_ratio=1024,
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/dm_control_suite_vision.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/dm_control_suite_vision.py
new file mode 100644
index 0000000000000000000000000000000000000000..8035d7e3ada348727ddea996ab9a801b5e6c09a2
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/dm_control_suite_vision.py
@@ -0,0 +1,55 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+
+# Run with:
+# python [this script name].py --env DMC/[task]/[domain] (e.g. DMC/cartpole/swingup)
+
+# To see all available options:
+# python [this script name].py --help
+
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+from ray.rllib.utils.test_utils import add_rllib_example_script_args
+
+parser = add_rllib_example_script_args(
+    default_iters=1000000,
+    default_reward=800.0,
+    default_timesteps=1000000,
+)
+# Use `parser` to add your own custom command line options to this script
+# and (if needed) use their values to set up `config` below.
+args = parser.parse_args()
+
+config = (
+    DreamerV3Config()
+    # Use image observations.
+    .environment(
+        env=args.env,
+        env_config={"from_pixels": True},
+    )
+    .env_runners(
+        num_env_runners=(args.num_env_runners or 0),
+        # If we use >1 GPU and increase the batch size accordingly, we should also
+        # increase the number of envs per worker.
+        num_envs_per_env_runner=4 * (args.num_learners or 1),
+        remote_worker_envs=True,
+    )
+    .reporting(
+        metrics_num_episodes_for_smoothing=(args.num_learners or 1),
+        report_images_and_videos=False,
+        report_dream_data=False,
+        report_individual_batch_item_stats=False,
+    )
+    # See Appendix A.
+    .training(
+        model_size="S",
+        training_ratio=512,
+        batch_size_B=16 * (args.num_learners or 1),
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/flappy_bird.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/flappy_bird.py
new file mode 100644
index 0000000000000000000000000000000000000000..31755b6dfe3c6f4ed3d1c6d8ed4068ed6012c237
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/flappy_bird.py
@@ -0,0 +1,78 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+from ray import tune
+
+
+# Number of GPUs to run on.
+num_gpus = 0
+
+# DreamerV3 config and default (1 GPU) learning rates.
+config = DreamerV3Config()
+w = config.world_model_lr
+c = config.critic_lr
+
+
+def _env_creator(ctx):
+    import flappy_bird_gymnasium  # noqa doctest: +SKIP
+    import gymnasium as gym
+    from supersuit.generic_wrappers import resize_v1
+    from ray.rllib.algorithms.dreamerv3.utils.env_runner import NormalizedImageEnv
+
+    return NormalizedImageEnv(
+        resize_v1(  # resize to 64x64 and normalize images
+            gym.make("FlappyBird-rgb-v0", audio_on=False), x_size=64, y_size=64
+        )
+    )
+
+
+# Register the FlappyBird-rgb-v0 env including necessary wrappers via the
+# `tune.register_env()` API.
+tune.register_env("flappy-bird", _env_creator)
+
+# Further specify the DreamerV3 config object to use.
+(
+    config.environment("flappy-bird")
+    .resources(
+        num_cpus_for_main_process=1,
+    )
+    .learners(
+        num_learners=0 if num_gpus == 1 else num_gpus,
+        num_gpus_per_learner=1 if num_gpus else 0,
+    )
+    .env_runners(
+        # If we use >1 GPU and increase the batch size accordingly, we should also
+        # increase the number of envs per worker.
+        num_envs_per_env_runner=8 * (num_gpus or 1),
+        remote_worker_envs=True,
+    )
+    .reporting(
+        metrics_num_episodes_for_smoothing=(num_gpus or 1),
+        report_images_and_videos=False,
+        report_dream_data=False,
+        report_individual_batch_item_stats=False,
+    )
+    # See Appendix A.
+    .training(
+        model_size="M",
+        training_ratio=64,
+        batch_size_B=16 * (num_gpus or 1),
+        # Use a well established 4-GPU lr scheduling recipe:
+        # ~ 1000 training updates with 0.4x[default rates], then over a few hundred
+        # steps, increase to 4x[default rates].
+        world_model_lr=[[0, 0.4 * w], [8000, 0.4 * w], [10000, 3 * w]],
+        critic_lr=[[0, 0.4 * c], [8000, 0.4 * c], [10000, 3 * c]],
+        actor_lr=[[0, 0.4 * c], [8000, 0.4 * c], [10000, 3 * c]],
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/frozenlake_2x2.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/frozenlake_2x2.py
new file mode 100644
index 0000000000000000000000000000000000000000..03ac201479d3555407742a3c862a3d80a1fc5321
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/frozenlake_2x2.py
@@ -0,0 +1,31 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+config = (
+    DreamerV3Config()
+    .environment(
+        "FrozenLake-v1",
+        env_config={
+            "desc": [
+                "SF",
+                "HG",
+            ],
+            "is_slippery": False,
+        },
+    )
+    .training(
+        model_size="XS",
+        training_ratio=1024,
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd6a8047092564d993e67070253b17d08c57f352
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/frozenlake_4x4_deterministic.py
@@ -0,0 +1,28 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+config = (
+    DreamerV3Config()
+    .environment(
+        "FrozenLake-v1",
+        env_config={
+            "map_name": "4x4",
+            "is_slippery": False,
+        },
+    )
+    .training(
+        model_size="nano",
+        training_ratio=1024,
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/gymnasium_robotics.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/gymnasium_robotics.py
new file mode 100644
index 0000000000000000000000000000000000000000..14fd1f93070304bac0d233f09f430d44b8fb25dc
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/gymnasium_robotics.py
@@ -0,0 +1,66 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+try:
+    import gymnasium_robotics  # noqa
+except (ImportError, ModuleNotFoundError):
+    print("You have to `pip install gymnasium_robotics` in order to run this example!")
+
+import gymnasium as gym
+
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+from ray import tune
+
+
+# Number of GPUs to run on.
+num_gpus = 4
+
+# Register the gymnasium robotics env (including necessary wrappers and options) via the
+# `tune.register_env()` API.
+# Create the specific gymnasium robotics env.
+# e.g. AdroitHandHammerSparse-v1 or FrankaKitchen-v1.
+# return gym.make("FrankaKitchen-v1", tasks_to_complete=["microwave", "kettle"])
+tune.register_env("flappy-bird", lambda ctx: gym.make("AdroitHandHammer-v1"))
+
+# Define the DreamerV3 config object to use.
+config = DreamerV3Config()
+w = config.world_model_lr
+c = config.critic_lr
+# Further specify the details of our config object.
+(
+    config.resources(
+        num_cpus_for_main_process=8 * (num_gpus or 1),
+    )
+    .learners(
+        num_learners=0 if num_gpus == 1 else num_gpus,
+        num_gpus_per_learner=1 if num_gpus else 0,
+    )
+    # If we use >1 GPU and increase the batch size accordingly, we should also
+    # increase the number of envs per worker.
+    .env_runners(num_envs_per_env_runner=8 * (num_gpus or 1), remote_worker_envs=True)
+    .reporting(
+        metrics_num_episodes_for_smoothing=(num_gpus or 1),
+        report_images_and_videos=False,
+        report_dream_data=False,
+        report_individual_batch_item_stats=False,
+    )
+    # See Appendix A.
+    .training(
+        model_size="XL",
+        training_ratio=64,
+        batch_size_B=16 * (num_gpus or 1),
+        world_model_lr=[[0, 0.4 * w], [50000, 0.4 * w], [100000, 3 * w]],
+        critic_lr=[[0, 0.4 * c], [50000, 0.4 * c], [100000, 3 * c]],
+        actor_lr=[[0, 0.4 * c], [50000, 0.4 * c], [100000, 3 * c]],
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/highway_env.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/highway_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3588f502c1aa2dd1c8affdfa2a947b3234360dc
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/highway_env.py
@@ -0,0 +1,71 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+try:
+    import highway_env  # noqa
+except (ImportError, ModuleNotFoundError):
+    print("You have to `pip install highway_env` in order to run this example!")
+
+import gymnasium as gym
+
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+from ray import tune
+
+
+# Number of GPUs to run on.
+num_gpus = 4
+
+# Register the highway env (including necessary wrappers and options) via the
+# `tune.register_env()` API.
+# Create the specific env.
+# e.g. roundabout-v0 or racetrack-v0
+tune.register_env("flappy-bird", lambda ctx: gym.make("intersection-v0", policy_freq=5))
+
+# Define the DreamerV3 config object to use.
+config = DreamerV3Config()
+w = config.world_model_lr
+c = config.critic_lr
+
+(
+    config.resources(
+        num_cpus_for_main_process=1,
+    )
+    .learners(
+        num_learners=0 if num_gpus == 1 else num_gpus,
+        num_gpus_per_learner=1 if num_gpus else 0,
+    )
+    .env_runners(
+        # If we use >1 GPU and increase the batch size accordingly, we should also
+        # increase the number of envs per worker.
+        num_envs_per_env_runner=8 * (num_gpus or 1),
+        remote_worker_envs=True,
+    )
+    .reporting(
+        metrics_num_episodes_for_smoothing=(num_gpus or 1),
+        report_images_and_videos=False,
+        report_dream_data=False,
+        report_individual_batch_item_stats=False,
+    )
+    # See Appendix A.
+    .training(
+        model_size="M",
+        training_ratio=64,
+        batch_size_B=16 * (num_gpus or 1),
+        # Use a well established 4-GPU lr scheduling recipe:
+        # ~ 1000 training updates with 0.4x[default rates], then over a few hundred
+        # steps, increase to 4x[default rates].
+        world_model_lr=[[0, 0.4 * w], [8000, 0.4 * w], [10000, 3 * w]],
+        critic_lr=[[0, 0.4 * c], [8000, 0.4 * c], [10000, 3 * c]],
+        actor_lr=[[0, 0.4 * c], [8000, 0.4 * c], [10000, 3 * c]],
+    )
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/pendulum.py b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/pendulum.py
new file mode 100644
index 0000000000000000000000000000000000000000..4acc4b9aa85a9386286e7f1fef1400b5b5fcbf4b
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/rllib/tuned_examples/dreamerv3/pendulum.py
@@ -0,0 +1,19 @@
+"""
+[1] Mastering Diverse Domains through World Models - 2023
+D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
+https://arxiv.org/pdf/2301.04104v1.pdf
+
+[2] Mastering Atari with Discrete World Models - 2021
+D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
+https://arxiv.org/pdf/2010.02193.pdf
+"""
+from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
+
+# Run with:
+# python run_regression_tests.py --dir [this file]
+
+config = (
+    DreamerV3Config()
+    .environment("Pendulum-v1")
+    .training(model_size="XS", training_ratio=1024)
+)