diff --git a/.gitattributes b/.gitattributes
index 403db22d381680a1ef963b6288d40f01393ee1b8..2180227b7a0e6b7d31765bb94941cf79afa1d08b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -156,3 +156,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/ray/data/__pycache__/read_api.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/ray/serve/_private/__pycache__/deployment_state.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/xgrammar/xgrammar_bindings.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/filelock.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/filelock.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ffbb56ba0908e66c0af7b59e842fd61d1de402b7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/filelock.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/json.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/json.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dcdcedb59c8f4ae296e96f52ec1d763d20e63cd7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/json.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/mlflow.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/mlflow.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1fdd64ba737849c0194a6837a915d44563968cf5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/mlflow.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/tensorflow_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/tensorflow_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c5e15f23f73a52241fb44a2457b42894692f857
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/tensorflow_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/torch_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/torch_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8b508ce78ec47c3be0ece3ed40bf5782c5cee52e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/torch_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/uri_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/uri_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a93bc5d057266cadbbeea4e45cbd78909ad78e9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/uri_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/usage.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/usage.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d13ac60ae989d4bcad03734e87a4296629c9478
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/usage.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/util.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/util.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..310014ad6aa8c7a59ea0bdc91dbcb79d06da6b3c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/__pycache__/util.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/config.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce1df8f77acca6eea43f66ac2bf2e8d2d5b471aa
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/config.py
@@ -0,0 +1,47 @@
+import dataclasses
+from typing import Iterable
+
+
+def ensure_only_allowed_dataclass_keys_updated(
+    dataclass: dataclasses.dataclass,
+    allowed_keys: Iterable[str],
+):
+    """
+    Validate dataclass by raising an exception if any key not included in
+    ``allowed_keys`` differs from the default value.
+
+    A ``ValueError`` will also be raised if any of the ``allowed_keys``
+    is not present in ``dataclass.__dict__``.
+
+    Args:
+        dataclass: Dict or dataclass to check.
+        allowed_keys: dataclass attribute keys that can have a value different than
+        the default one.
+    """
+    default_data = dataclass.__class__()
+
+    allowed_keys = set(allowed_keys)
+
+    # TODO: split keys_not_in_dict validation to a separate function.
+    keys_not_in_dict = [key for key in allowed_keys if key not in default_data.__dict__]
+    if keys_not_in_dict:
+        raise ValueError(
+            f"Key(s) {keys_not_in_dict} are not present in "
+            f"{dataclass.__class__.__name__}. "
+            "Remove them from `allowed_keys`. "
+            f"Valid keys: {list(default_data.__dict__.keys())}"
+        )
+
+    # These keys should not have been updated in the `dataclass` object
+    prohibited_keys = set(default_data.__dict__) - allowed_keys
+
+    bad_keys = [
+        key
+        for key in prohibited_keys
+        if dataclass.__dict__[key] != default_data.__dict__[key]
+    ]
+    if bad_keys:
+        raise ValueError(
+            f"Key(s) {bad_keys} are not allowed to be updated in the current context. "
+            "Remove them from the dataclass."
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..824833df03ffe9e03f583856b9336d7610386d91
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__init__.py
@@ -0,0 +1,92 @@
+import logging
+import threading
+from typing import Optional
+
+import ray
+import ray._private.ray_constants as ray_constants
+from ray.air._internal.device_manager.cpu import CPUTorchDeviceManager
+from ray.air._internal.device_manager.hpu import HPUTorchDeviceManager
+from ray.air._internal.device_manager.npu import NPUTorchDeviceManager
+from ray.air._internal.device_manager.nvidia_gpu import CUDATorchDeviceManager
+from ray.air._internal.device_manager.torch_device_manager import TorchDeviceManager
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_TORCH_DEVICE_MANAGER_CLS = CPUTorchDeviceManager
+
+
+SUPPORTED_ACCELERATOR_TORCH_DEVICE_MANAGER = {
+    ray_constants.GPU: CUDATorchDeviceManager,
+    ray_constants.HPU: HPUTorchDeviceManager,
+    ray_constants.NPU: NPUTorchDeviceManager,
+}
+
+
+def register_custom_torch_dist_backend(backend: Optional[str] = None) -> None:
+    if backend == "hccl":
+        # The name for the communication backend of Habana and torch-npu is the same.
+        HPUTorchDeviceManager.register_custom_torch_dist_backend()
+
+        NPUTorchDeviceManager.register_custom_torch_dist_backend()
+
+
+_torch_device_manager = None
+_torch_device_manager_lock = threading.Lock()
+
+
+def get_torch_device_manager_by_context() -> TorchDeviceManager:
+    global _torch_device_manager
+
+    with _torch_device_manager_lock:
+        if not _torch_device_manager:
+            existing_device_manager_cls = None
+            resources = ray.get_runtime_context().get_accelerator_ids()
+
+            # select correct accelerator type from resources
+            for resource_type, resource_value in resources.items():
+                device_manager_cls = SUPPORTED_ACCELERATOR_TORCH_DEVICE_MANAGER.get(
+                    resource_type, None
+                )
+                if resource_value and device_manager_cls:
+                    # An error will raise when multiple accelerators are specified.
+                    if existing_device_manager_cls:
+                        raise RuntimeError(
+                            "Unable to determine the appropriate DeviceManager "
+                            f"for the specified resources {resources}."
+                        )
+                    else:
+                        existing_device_manager_cls = device_manager_cls
+
+            device_manager_cls = (
+                existing_device_manager_cls or DEFAULT_TORCH_DEVICE_MANAGER_CLS
+            )
+
+            _torch_device_manager = device_manager_cls()
+
+    return _torch_device_manager
+
+
+def get_torch_device_manager_by_device_type(device_type: str):
+    if device_type.lower() == ray_constants.GPU.lower() or device_type == "cuda":
+        return CUDATorchDeviceManager()
+    elif device_type.lower() == ray_constants.NPU.lower():
+        return NPUTorchDeviceManager()
+    elif device_type.lower() == ray_constants.HPU.lower():
+        return HPUTorchDeviceManager()
+    elif device_type.lower() == "cpu":
+        return CPUTorchDeviceManager()
+
+    raise RuntimeError(f"Device type {device_type} cannot be recognized.")
+
+
+__all__ = [
+    TorchDeviceManager,
+    CPUTorchDeviceManager,
+    CUDATorchDeviceManager,
+    HPUTorchDeviceManager,
+    NPUTorchDeviceManager,
+    register_custom_torch_dist_backend,
+    get_torch_device_manager_by_context,
+    get_torch_device_manager_by_device_type,
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d283eda018bca41c23fa83b9062465ef1720807d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/cpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/cpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7634ac0efbc54cb216413e355403a7140ea1305
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/cpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/hpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/hpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..05f53c5322cd93b66d2fd3b27324413e21342102
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/hpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/npu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/npu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3ec345653d67bb22bab6cea81a29defb1d0d4a2
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/npu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/nvidia_gpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/nvidia_gpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f48476036b521689254980ed98cd3700c0892406
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/nvidia_gpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/torch_device_manager.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/torch_device_manager.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d8fb155f0c5d446eb4def07e82cb9676a32289ec
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/__pycache__/torch_device_manager.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/cpu.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/cpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..76fa73765287c6bf68a0b2f6e7d4130297b799df
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/cpu.py
@@ -0,0 +1,30 @@
+from contextlib import contextmanager
+from typing import List
+
+import torch
+
+from ray.air._internal.device_manager.torch_device_manager import TorchDeviceManager
+
+
+class CPUTorchDeviceManager(TorchDeviceManager):
+    """CPU device manager"""
+
+    def is_available(self) -> bool():
+        return True
+
+    def get_devices(self) -> List[torch.device]:
+        """Gets the correct torch device list configured for this process."""
+        return [torch.device("cpu")]
+
+    def supports_stream(self) -> bool:
+        """Validate if the device type support create a stream"""
+        return False
+
+    def get_stream_context(self, stream):
+        """Return empty context mananger for CPU."""
+
+        @contextmanager
+        def default_context_manager():
+            yield
+
+        return default_context_manager()
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/hpu.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/hpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb402ea65b0d9378cb10d70b50bab3588be79102
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/hpu.py
@@ -0,0 +1,50 @@
+from contextlib import contextmanager
+from typing import List, Union
+
+import torch
+
+from ray._private.accelerators.hpu import HPU_PACKAGE_AVAILABLE
+from ray.air._internal.device_manager.torch_device_manager import TorchDeviceManager
+
+if HPU_PACKAGE_AVAILABLE:
+    import habana_frameworks.torch.hpu as torch_hpu
+
+
+class HPUTorchDeviceManager(TorchDeviceManager):
+    """HPU device manager"""
+
+    @staticmethod
+    def register_custom_torch_dist_backend():
+        if HPU_PACKAGE_AVAILABLE:
+            import habana_frameworks.torch.core  # noqa: F401
+            import habana_frameworks.torch.distributed.hccl  # noqa: F401
+
+    def is_available(self) -> bool():
+        if not HPU_PACKAGE_AVAILABLE:
+            return False
+
+        return torch_hpu.is_available()
+
+    def get_devices(self) -> List[torch.device]:
+        if not self.is_available():
+            raise RuntimeError(
+                "Using HPUTorchDeviceManager but torch hpu is not available."
+            )
+
+        return [torch.device("hpu")]
+
+    def set_device(self, device: Union[torch.device, int, str, None]):
+        torch_hpu.set_device(device)
+
+    def supports_stream(self) -> bool:
+        """Validate if the device type support create a stream"""
+        return False
+
+    def get_stream_context(self, stream):
+        """Get HPU stream context manager, empty so far."""
+
+        @contextmanager
+        def default_context_manager():
+            yield
+
+        return default_context_manager()
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/npu.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/npu.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa6d7bad24081917020b88e4ad90e4335cc631ca
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/npu.py
@@ -0,0 +1,105 @@
+import os
+from importlib.util import find_spec
+from typing import List, Union
+
+import torch
+
+import ray
+import ray._private.ray_constants as ray_constants
+from ray.air._internal.device_manager.torch_device_manager import TorchDeviceManager
+
+
+def is_package_present(package_name: str) -> bool:
+    try:
+        return find_spec(package_name) is not None
+    except ModuleNotFoundError:
+        return False
+
+
+NPU_TORCH_PACKAGE_AVAILABLE = is_package_present("torch_npu")
+
+
+if NPU_TORCH_PACKAGE_AVAILABLE:
+    import torch_npu  # noqa: F401
+
+
+class NPUTorchDeviceManager(TorchDeviceManager):
+    """Ascend NPU device manager"""
+
+    @staticmethod
+    def register_custom_torch_dist_backend():
+        if NPU_TORCH_PACKAGE_AVAILABLE:
+            import torch_npu  # noqa: F401, F811
+
+    def is_available(self) -> bool:
+        if not NPU_TORCH_PACKAGE_AVAILABLE:
+            return False
+
+        return torch.npu.is_available()
+
+    def get_devices(self) -> List[torch.device]:
+        """Gets the correct torch device list configured for this process.
+
+        Returns a list of torch NPU devices allocated for the current worker.
+        If no NPUs are assigned, then it returns a list with a single CPU device.
+        """
+        if NPU_TORCH_PACKAGE_AVAILABLE and torch.npu.is_available():
+            npu_ids = [
+                str(id)
+                for id in ray.get_runtime_context().get_accelerator_ids()[
+                    ray_constants.NPU
+                ]
+            ]
+
+            device_ids = []
+
+            if len(npu_ids) > 0:
+                npu_visible_str = os.environ.get(
+                    ray_constants.NPU_RT_VISIBLE_DEVICES_ENV_VAR, ""
+                )
+                if npu_visible_str and npu_visible_str != "NoDevFiles":
+                    npu_visible_list = npu_visible_str.split(",")
+                else:
+                    npu_visible_list = []
+
+                for npu_id in npu_ids:
+                    try:
+                        device_ids.append(npu_visible_list.index(npu_id))
+                    except IndexError:
+                        raise RuntimeError(
+                            "ASCEND_RT_VISIBLE_DEVICES set incorrectly. "
+                            f"Got {npu_visible_str}, expected to include {npu_id}. "
+                            "Did you override the `ASCEND_RT_VISIBLE_DEVICES` "
+                            "environment variable?"
+                        )
+            else:
+                # If called on the driver or outside of Ray Train, return the
+                # 0th device.
+                device_ids.append(0)
+
+            devices = [torch.device(f"npu:{device_id}") for device_id in device_ids]
+        else:
+            raise RuntimeError(
+                "Using NPUTorchDeviceManager but torch npu is not available."
+            )
+
+        return devices
+
+    def set_device(self, device: Union[torch.device, int]):
+        torch.npu.set_device(device)
+
+    def supports_stream(self) -> bool:
+        """Validate if the device type support to create a stream"""
+        return True
+
+    def create_stream(self, device):
+        """Create a stream on NPU device"""
+        return torch.npu.Stream(device)
+
+    def get_stream_context(self, stream):
+        """Get a torch.stream context on NPU device"""
+        return torch.npu.stream(stream)
+
+    def get_current_stream(self):
+        """Get current stream for NPU device"""
+        return torch.npu.current_stream()
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/nvidia_gpu.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/nvidia_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4bb1b54097e153cc7784c849c46331478d87988
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/nvidia_gpu.py
@@ -0,0 +1,79 @@
+import os
+from typing import List, Union
+
+import torch
+
+import ray
+from ray.air._internal.device_manager.torch_device_manager import TorchDeviceManager
+
+
+class CUDATorchDeviceManager(TorchDeviceManager):
+    """CUDA device manager"""
+
+    def is_available(self) -> bool():
+        return torch.cuda.is_available()
+
+    def get_devices(self) -> List[torch.device]:
+        """Gets the correct torch device list configured for this process.
+
+        Returns a list of torch CUDA devices allocated for the current worker.
+        If no GPUs are assigned, then it returns a list with a single CPU device.
+
+        Assumes that `CUDA_VISIBLE_DEVICES` is set and is a
+        superset of the `ray.get_gpu_ids()`.
+        """
+
+        # GPU IDs are assigned by Ray after you specify "use_gpu"
+        # GPU `ray.get_gpu_ids()` may return ints or may return strings.
+        # We should always convert to strings.
+        gpu_ids = [str(id) for id in ray.get_gpu_ids()]
+
+        device_ids = []
+
+        if len(gpu_ids) > 0:
+            cuda_visible_str = os.environ.get("CUDA_VISIBLE_DEVICES", "")
+            if cuda_visible_str and cuda_visible_str != "NoDevFiles":
+                cuda_visible_list = cuda_visible_str.split(",")
+            else:
+                cuda_visible_list = []
+
+            # By default, there should only be one GPU ID if `use_gpu=True`.
+            # If there are multiple GPUs, return a list of devices.
+            # If using fractional GPUs, these IDs are not guaranteed
+            # to be unique across different processes.
+            for gpu_id in gpu_ids:
+                try:
+                    device_ids.append(cuda_visible_list.index(gpu_id))
+                except IndexError:
+                    raise RuntimeError(
+                        "CUDA_VISIBLE_DEVICES set incorrectly. "
+                        f"Got {cuda_visible_str}, expected to include {gpu_id}. "
+                        "Did you override the `CUDA_VISIBLE_DEVICES` environment"
+                        " variable? If not, please help file an issue on Github."
+                    )
+
+        else:
+            # If called on the driver or outside of Ray Train, return the
+            # 0th device.
+            device_ids.append(0)
+
+        return [torch.device(f"cuda:{device_id}") for device_id in device_ids]
+
+    def set_device(self, device: Union[torch.device, int, str, None]):
+        torch.cuda.set_device(device)
+
+    def supports_stream(self) -> bool:
+        """Validate if the device type support create a stream"""
+        return True
+
+    def create_stream(self, device: torch.device) -> torch.cuda.Stream:
+        """Create a stream on cuda device"""
+        return torch.cuda.Stream(device)
+
+    def get_stream_context(self, stream):
+        """Get a stream context for cuda device"""
+        return torch.cuda.stream(stream)
+
+    def get_current_stream(self) -> torch.cuda.Stream:
+        """Get current stream for cuda device"""
+        return torch.cuda.current_stream()
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/torch_device_manager.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/torch_device_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..d522a477ef58a5021300eab3415abe99d3079213
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/device_manager/torch_device_manager.py
@@ -0,0 +1,40 @@
+from abc import ABC
+from typing import List, Union
+
+import torch
+
+
+class TorchDeviceManager(ABC):
+    """This class contains the function needed for supporting
+    an acclerator family in Ray AI Library.
+    """
+
+    def is_available(self) -> bool:
+        """Validate if device is available."""
+        ...
+
+    def get_devices(self) -> List[torch.device]:
+        """Gets the correct torch device configured for this process"""
+        ...
+
+    def set_device(self, device: Union[torch.device, int, str, None]):
+        """Set the correct device for this process"""
+        ...
+
+    def supports_stream(self) -> bool:
+        """Validate if the device type support create a stream"""
+        ...
+
+    def create_stream(self, device: torch.device):
+        """Create a device stream"""
+        ...
+
+    def get_stream_context(self, stream):
+        """Get a stream context of device. If device didn't support stream,
+        this should return a empty context manager instead of  None.
+        """
+        ...
+
+    def get_current_stream(self):
+        """Get current stream on accelerators like torch.cuda.current_stream"""
+        ...
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/filelock.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/filelock.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dd86d023e264f7328dad53dc0417657d246872b
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/filelock.py
@@ -0,0 +1,46 @@
+import hashlib
+import os
+from pathlib import Path
+
+from filelock import FileLock
+
+import ray
+
+RAY_LOCKFILE_DIR = "_ray_lockfiles"
+
+
+class TempFileLock:
+    """FileLock wrapper that uses temporary file locks.
+
+    The temporary directory that these locks are saved to can be configured via
+    the `RAY_TMPDIR` environment variable.
+
+    Args:
+        path: The file path that this temporary file lock is used for.
+            This will be used to generate the lockfile filename.
+            Ex: For concurrent writes to a file, this is the common filepath
+            that multiple processes are writing to.
+        **kwargs: Additional keyword arguments to pass to the underlying `FileLock`.
+    """
+
+    def __init__(self, path: str, **kwargs):
+        self.path = path
+        temp_dir = Path(ray._private.utils.get_user_temp_dir()).resolve()
+        self._lock_dir = temp_dir / RAY_LOCKFILE_DIR
+        self._path_hash = hashlib.sha1(
+            str(Path(self.path).resolve()).encode("utf-8")
+        ).hexdigest()
+        self._lock_path = self._lock_dir / f"{self._path_hash}.lock"
+
+        os.makedirs(str(self._lock_dir), exist_ok=True)
+        self._lock = FileLock(self._lock_path, **kwargs)
+
+    def __enter__(self):
+        self._lock.acquire()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self._lock.release()
+
+    def __getattr__(self, name):
+        return getattr(self._lock, name)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/json.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/json.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e88824e7109106a54020e3d5eec2629043f3ee1
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/json.py
@@ -0,0 +1,31 @@
+import json
+import numbers
+
+import numpy as np
+
+
+class SafeFallbackEncoder(json.JSONEncoder):
+    def __init__(self, nan_str="null", **kwargs):
+        super(SafeFallbackEncoder, self).__init__(**kwargs)
+        self.nan_str = nan_str
+
+    def default(self, value):
+        try:
+            if type(value).__module__ == np.__name__ and isinstance(value, np.ndarray):
+                return value.tolist()
+
+            if isinstance(value, np.bool_):
+                return bool(value)
+
+            if np.isnan(value):
+                return self.nan_str
+
+            if issubclass(type(value), numbers.Integral):
+                return int(value)
+            if issubclass(type(value), numbers.Number):
+                return float(value)
+
+            return super(SafeFallbackEncoder, self).default(value)
+
+        except Exception:
+            return str(value)  # give up, just stringify it (ok for logs)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/mlflow.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/mlflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..727318ce839ff0bf0a78bca5346c66e44b245b75
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/mlflow.py
@@ -0,0 +1,342 @@
+import logging
+import os
+from copy import deepcopy
+from typing import TYPE_CHECKING, Dict, Optional
+
+from packaging import version
+
+from ray._private.dict import flatten_dict
+
+if TYPE_CHECKING:
+    from mlflow.entities import Run
+    from mlflow.tracking import MlflowClient
+
+logger = logging.getLogger(__name__)
+
+
+class _MLflowLoggerUtil:
+    """Util class for setting up and logging to MLflow.
+
+    Use this util for any library that needs MLflow logging/tracking logic
+    such as Ray Tune or Ray Train.
+    """
+
+    def __init__(self):
+        import mlflow
+
+        self._mlflow = mlflow
+        self.experiment_id = None
+
+    def __deepcopy__(self, memo=None):
+        # mlflow is a module, and thus cannot be copied
+        _mlflow = self._mlflow
+        self.__dict__.pop("_mlflow")
+        dict_copy = deepcopy(self.__dict__, memo)
+        copied_object = _MLflowLoggerUtil()
+        copied_object.__dict__.update(dict_copy)
+        self._mlflow = _mlflow
+        copied_object._mlflow = _mlflow
+        return copied_object
+
+    def setup_mlflow(
+        self,
+        tracking_uri: Optional[str] = None,
+        registry_uri: Optional[str] = None,
+        experiment_id: Optional[str] = None,
+        experiment_name: Optional[str] = None,
+        tracking_token: Optional[str] = None,
+        artifact_location: Optional[str] = None,
+        create_experiment_if_not_exists: bool = True,
+    ):
+        """
+        Sets up MLflow.
+
+        Sets the Mlflow tracking uri & token, and registry URI. Also sets
+        the MLflow experiment that the logger should use, and possibly
+        creates new experiment if it does not exist.
+
+        Args:
+            tracking_uri: The tracking URI for the MLflow tracking
+                server.
+            registry_uri: The registry URI for the MLflow model registry.
+            experiment_id: The id of an already existing MLflow
+                experiment to use for logging. If None is passed in
+                here and the MFLOW_EXPERIMENT_ID is not set, or the
+                experiment with this id does not exist,
+                ``experiment_name`` will be used instead. This argument takes
+                precedence over ``experiment_name`` if both are passed in.
+            experiment_name: The experiment name to use for logging.
+                If None is passed in here, the MLFLOW_EXPERIMENT_NAME environment
+                variable is used to determine the experiment name.
+                If the experiment with the name already exists with MLflow,
+                it will be reused. If not, a new experiment will be created
+                with the provided name if
+                ``create_experiment_if_not_exists`` is set to True.
+            artifact_location: The location to store run artifacts.
+                If not provided, MLFlow picks an appropriate default.
+                Ignored if experiment already exists.
+            tracking_token: Tracking token used to authenticate with MLflow.
+            create_experiment_if_not_exists: Whether to create an
+                experiment with the provided name if it does not already
+                exist. Defaults to True.
+
+        Returns:
+            Whether setup is successful.
+        """
+        if tracking_token:
+            os.environ["MLFLOW_TRACKING_TOKEN"] = tracking_token
+
+        self._mlflow.set_tracking_uri(tracking_uri)
+        self._mlflow.set_registry_uri(registry_uri)
+
+        # First check experiment_id.
+        experiment_id = (
+            experiment_id
+            if experiment_id is not None
+            else os.environ.get("MLFLOW_EXPERIMENT_ID")
+        )
+        if experiment_id is not None:
+            from mlflow.exceptions import MlflowException
+
+            try:
+                self._mlflow.get_experiment(experiment_id=experiment_id)
+                logger.debug(
+                    f"Experiment with provided id {experiment_id} "
+                    "exists. Setting that as the experiment."
+                )
+                self.experiment_id = experiment_id
+                return
+            except MlflowException:
+                pass
+
+        # Then check experiment_name.
+        experiment_name = (
+            experiment_name
+            if experiment_name is not None
+            else os.environ.get("MLFLOW_EXPERIMENT_NAME")
+        )
+        if experiment_name is not None and self._mlflow.get_experiment_by_name(
+            name=experiment_name
+        ):
+            logger.debug(
+                f"Experiment with provided name {experiment_name} "
+                "exists. Setting that as the experiment."
+            )
+            self.experiment_id = self._mlflow.get_experiment_by_name(
+                experiment_name
+            ).experiment_id
+            return
+
+        # An experiment with the provided id or name does not exist.
+        # Create a new experiment if applicable.
+        if experiment_name and create_experiment_if_not_exists:
+            logger.debug(
+                "Existing experiment not found. Creating new "
+                f"experiment with name: {experiment_name}"
+            )
+            self.experiment_id = self._mlflow.create_experiment(
+                name=experiment_name, artifact_location=artifact_location
+            )
+            return
+
+        if create_experiment_if_not_exists:
+            raise ValueError(
+                f"Experiment with the provided experiment_id: "
+                f"{experiment_id} does not exist and no "
+                f"experiment_name provided. At least one of "
+                f"these has to be provided."
+            )
+        else:
+            raise ValueError(
+                f"Experiment with the provided experiment_id: "
+                f"{experiment_id} or experiment_name: "
+                f"{experiment_name} does not exist. Please "
+                f"create an MLflow experiment and provide "
+                f"either its id or name."
+            )
+
+    def _parse_dict(self, dict_to_log: Dict) -> Dict:
+        """Parses provided dict to convert all values to float.
+
+        MLflow can only log metrics that are floats. This does not apply to
+        logging parameters or artifacts.
+
+        Args:
+            dict_to_log: The dictionary containing the metrics to log.
+
+        Returns:
+            A dictionary containing the metrics to log with all values being
+                converted to floats, or skipped if not able to be converted.
+        """
+        new_dict = {}
+        for key, value in dict_to_log.items():
+            try:
+                value = float(value)
+                new_dict[key] = value
+            except (ValueError, TypeError):
+                logger.debug(
+                    "Cannot log key {} with value {} since the "
+                    "value cannot be converted to float.".format(key, value)
+                )
+                continue
+
+        return new_dict
+
+    def start_run(
+        self,
+        run_name: Optional[str] = None,
+        tags: Optional[Dict] = None,
+        set_active: bool = False,
+    ) -> "Run":
+        """Starts a new run and possibly sets it as the active run.
+
+        Args:
+            tags: Tags to set for the new run.
+            set_active: Whether to set the new run as the active run.
+                If an active run already exists, then that run is returned.
+
+        Returns:
+            The newly created MLflow run.
+        """
+        import mlflow
+        from mlflow.utils.mlflow_tags import MLFLOW_RUN_NAME
+
+        if tags is None:
+            tags = {}
+
+        if set_active:
+            return self._start_active_run(run_name=run_name, tags=tags)
+
+        client = self._get_client()
+        # If `mlflow==1.30.0` and we don't use `run_name`, then MLflow might error. For
+        # more information, see #29749.
+        if version.parse(mlflow.__version__) >= version.parse("1.30.0"):
+            run = client.create_run(
+                run_name=run_name, experiment_id=self.experiment_id, tags=tags
+            )
+        else:
+            tags[MLFLOW_RUN_NAME] = run_name
+            run = client.create_run(experiment_id=self.experiment_id, tags=tags)
+
+        return run
+
+    def _start_active_run(
+        self, run_name: Optional[str] = None, tags: Optional[Dict] = None
+    ) -> "Run":
+        """Starts a run and sets it as the active run if one does not exist.
+
+        If an active run already exists, then returns it.
+        """
+        active_run = self._mlflow.active_run()
+        if active_run:
+            return active_run
+
+        return self._mlflow.start_run(
+            run_name=run_name, experiment_id=self.experiment_id, tags=tags
+        )
+
+    def _run_exists(self, run_id: str) -> bool:
+        """Check if run with the provided id exists."""
+        from mlflow.exceptions import MlflowException
+
+        try:
+            self._mlflow.get_run(run_id=run_id)
+            return True
+        except MlflowException:
+            return False
+
+    def _get_client(self) -> "MlflowClient":
+        """Returns an ml.tracking.MlflowClient instance to use for logging."""
+        tracking_uri = self._mlflow.get_tracking_uri()
+        registry_uri = self._mlflow.get_registry_uri()
+
+        from mlflow.tracking import MlflowClient
+
+        return MlflowClient(tracking_uri=tracking_uri, registry_uri=registry_uri)
+
+    def log_params(self, params_to_log: Dict, run_id: Optional[str] = None):
+        """Logs the provided parameters to the run specified by run_id.
+
+        If no ``run_id`` is passed in, then logs to the current active run.
+        If there is not active run, then creates a new run and sets it as
+        the active run.
+
+        Args:
+            params_to_log: Dictionary of parameters to log.
+            run_id (Optional[str]): The ID of the run to log to.
+        """
+        params_to_log = flatten_dict(params_to_log)
+
+        if run_id and self._run_exists(run_id):
+            client = self._get_client()
+            for key, value in params_to_log.items():
+                client.log_param(run_id=run_id, key=key, value=value)
+
+        else:
+            for key, value in params_to_log.items():
+                self._mlflow.log_param(key=key, value=value)
+
+    def log_metrics(self, step, metrics_to_log: Dict, run_id: Optional[str] = None):
+        """Logs the provided metrics to the run specified by run_id.
+
+
+        If no ``run_id`` is passed in, then logs to the current active run.
+        If there is not active run, then creates a new run and sets it as
+        the active run.
+
+        Args:
+            metrics_to_log: Dictionary of metrics to log.
+            run_id (Optional[str]): The ID of the run to log to.
+        """
+        metrics_to_log = flatten_dict(metrics_to_log)
+        metrics_to_log = self._parse_dict(metrics_to_log)
+
+        if run_id and self._run_exists(run_id):
+            client = self._get_client()
+            for key, value in metrics_to_log.items():
+                client.log_metric(run_id=run_id, key=key, value=value, step=step)
+
+        else:
+            for key, value in metrics_to_log.items():
+                self._mlflow.log_metric(key=key, value=value, step=step)
+
+    def save_artifacts(self, dir: str, run_id: Optional[str] = None):
+        """Saves directory as artifact to the run specified by run_id.
+
+        If no ``run_id`` is passed in, then saves to the current active run.
+        If there is not active run, then creates a new run and sets it as
+        the active run.
+
+        Args:
+            dir: Path to directory containing the files to save.
+            run_id (Optional[str]): The ID of the run to log to.
+        """
+        if run_id and self._run_exists(run_id):
+            client = self._get_client()
+            client.log_artifacts(run_id=run_id, local_dir=dir)
+        else:
+            self._mlflow.log_artifacts(local_dir=dir)
+
+    def end_run(self, status: Optional[str] = None, run_id=None):
+        """Terminates the run specified by run_id.
+
+        If no ``run_id`` is passed in, then terminates the
+        active run if one exists.
+
+        Args:
+            status (Optional[str]): The status to set when terminating the run.
+            run_id (Optional[str]): The ID of the run to terminate.
+
+        """
+        if (
+            run_id
+            and self._run_exists(run_id)
+            and not (
+                self._mlflow.active_run()
+                and self._mlflow.active_run().info.run_id == run_id
+            )
+        ):
+            client = self._get_client()
+            client.set_terminated(run_id=run_id, status=status)
+        else:
+            self._mlflow.end_run(status=status)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/tensorflow_utils.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/tensorflow_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..46b3c2d1d1b7d5a4b86ffdf46fa6cd8598cca63a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/tensorflow_utils.py
@@ -0,0 +1,137 @@
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pyarrow
+import tensorflow as tf
+
+from ray.air.util.data_batch_conversion import _unwrap_ndarray_object_type_if_needed
+from ray.air.util.tensor_extensions.arrow import get_arrow_extension_tensor_types
+
+if TYPE_CHECKING:
+    from ray.data._internal.pandas_block import PandasBlockSchema
+
+
+def convert_ndarray_to_tf_tensor(
+    ndarray: np.ndarray,
+    dtype: Optional[tf.dtypes.DType] = None,
+    type_spec: Optional[tf.TypeSpec] = None,
+) -> tf.Tensor:
+    """Convert a NumPy ndarray to a TensorFlow Tensor.
+
+    Args:
+        ndarray: A NumPy ndarray that we wish to convert to a TensorFlow Tensor.
+        dtype: A TensorFlow dtype for the created tensor; if None, the dtype will be
+            inferred from the NumPy ndarray data.
+        type_spec: A type spec that specifies the shape and dtype of the returned
+            tensor. If you specify ``dtype``, the dtype stored in the type spec is
+            ignored.
+
+    Returns: A TensorFlow Tensor.
+    """
+    if dtype is None and type_spec is not None:
+        dtype = type_spec.dtype
+
+    is_ragged = isinstance(type_spec, tf.RaggedTensorSpec)
+    ndarray = _unwrap_ndarray_object_type_if_needed(ndarray)
+    if is_ragged:
+        return tf.ragged.constant(ndarray, dtype=dtype)
+    else:
+        return tf.convert_to_tensor(ndarray, dtype=dtype)
+
+
+def convert_ndarray_batch_to_tf_tensor_batch(
+    ndarrays: Union[np.ndarray, Dict[str, np.ndarray]],
+    dtypes: Optional[Union[tf.dtypes.DType, Dict[str, tf.dtypes.DType]]] = None,
+) -> Union[tf.Tensor, Dict[str, tf.Tensor]]:
+    """Convert a NumPy ndarray batch to a TensorFlow Tensor batch.
+
+    Args:
+        ndarray: A (dict of) NumPy ndarray(s) that we wish to convert to a TensorFlow
+            Tensor.
+        dtype: A (dict of) TensorFlow dtype(s) for the created tensor; if None, the
+            dtype will be inferred from the NumPy ndarray data.
+
+    Returns: A (dict of) TensorFlow Tensor(s).
+    """
+    if isinstance(ndarrays, np.ndarray):
+        # Single-tensor case.
+        if isinstance(dtypes, dict):
+            if len(dtypes) != 1:
+                raise ValueError(
+                    "When constructing a single-tensor batch, only a single dtype "
+                    f"should be given, instead got: {dtypes}"
+                )
+            dtypes = next(iter(dtypes.values()))
+        batch = convert_ndarray_to_tf_tensor(ndarrays, dtypes)
+    else:
+        # Multi-tensor case.
+        batch = {
+            col_name: convert_ndarray_to_tf_tensor(
+                col_ndarray,
+                dtype=dtypes[col_name] if isinstance(dtypes, dict) else dtypes,
+            )
+            for col_name, col_ndarray in ndarrays.items()
+        }
+
+    return batch
+
+
+def get_type_spec(
+    schema: Union["pyarrow.lib.Schema", "PandasBlockSchema"],
+    columns: Union[str, List[str]],
+) -> Union[tf.TypeSpec, Dict[str, tf.TypeSpec]]:
+    import pyarrow as pa
+
+    from ray.data.extensions import TensorDtype
+
+    tensor_extension_types = get_arrow_extension_tensor_types()
+
+    assert not isinstance(schema, type)
+
+    dtypes: Dict[str, Union[np.dtype, pa.DataType]] = dict(
+        zip(schema.names, schema.types)
+    )
+
+    def get_dtype(dtype: Union[np.dtype, pa.DataType]) -> tf.dtypes.DType:
+        if isinstance(dtype, pa.ListType):
+            dtype = dtype.value_type
+        if isinstance(dtype, pa.DataType):
+            dtype = dtype.to_pandas_dtype()
+        if isinstance(dtype, TensorDtype):
+            dtype = dtype.element_dtype
+        res = tf.dtypes.as_dtype(dtype)
+        return res
+
+    def get_shape(dtype: Union[np.dtype, pa.DataType]) -> Tuple[int, ...]:
+        shape = (None,)
+        if isinstance(dtype, tensor_extension_types):
+            dtype = dtype.to_pandas_dtype()
+        if isinstance(dtype, pa.ListType):
+            shape += (None,)
+        elif isinstance(dtype, TensorDtype):
+            shape += dtype.element_shape
+        return shape
+
+    def get_tensor_spec(
+        dtype: Union[np.dtype, pa.DataType], *, name: str
+    ) -> tf.TypeSpec:
+
+        shape, dtype = get_shape(dtype), get_dtype(dtype)
+        # Batch dimension is always `None`. So, if there's more than one `None`-valued
+        # dimension, then the tensor is ragged.
+        is_ragged = sum(dim is None for dim in shape) > 1
+        if is_ragged:
+            type_spec = tf.RaggedTensorSpec(shape, dtype=dtype)
+        else:
+            type_spec = tf.TensorSpec(shape, dtype=dtype, name=name)
+        return type_spec
+
+    if isinstance(columns, str):
+        name, dtype = columns, dtypes[columns]
+        return get_tensor_spec(dtype, name=name)
+
+    return {
+        name: get_tensor_spec(dtype, name=name)
+        for name, dtype in dtypes.items()
+        if name in columns
+    }
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/torch_utils.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/torch_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..caeb27a20a30a4709a270f703fbaff245a040ec6
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/torch_utils.py
@@ -0,0 +1,294 @@
+import warnings
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+import pandas as pd
+import torch
+
+from ray.air._internal.device_manager import get_torch_device_manager_by_context
+from ray.air.util.data_batch_conversion import _unwrap_ndarray_object_type_if_needed
+
+
+def get_devices() -> List[torch.device]:
+    """Gets the correct torch device list configured for this process.
+
+    Returns a list of torch accelerator (GPU, HPU, NPU...) devices allocated for
+    the current worker.
+    If no accelerators are assigned, then it returns a list with a single CPU device.
+    """
+    return get_torch_device_manager_by_context().get_devices()
+
+
+def convert_pandas_to_torch_tensor(
+    data_batch: pd.DataFrame,
+    columns: Optional[Union[List[str], List[List[str]]]] = None,
+    column_dtypes: Optional[Union[torch.dtype, List[torch.dtype]]] = None,
+    unsqueeze: bool = True,
+) -> Union[torch.Tensor, List[torch.Tensor]]:
+    """Converts a Pandas dataframe to a torch Tensor or list of torch Tensors.
+
+    The format of the return type will match the format of ``columns``. If a
+    list of columns is provided, the return type will be a single tensor. If
+    ``columns`` is a list of lists, then the return type will be a list of
+    tensors.
+
+    Args:
+        data_batch: The pandas dataframe to convert to a
+            torch tensor.
+        columns:
+            The names of the columns in the dataframe to include in the
+            torch tensor. If this arg is a List[List[str]], then the return
+            type will be a List of tensors. This is useful for multi-input
+            models. If None, then use all columns in the ``data_batch``.
+        column_dtype: The
+            torch dtype to use for the tensor. If set to None,
+            then automatically infer the dtype.
+        unsqueeze: If set to True, the tensors
+            will be unsqueezed (reshaped to (N, 1)) before being concatenated into
+            the final tensor. Otherwise, they will be left as is, that is
+            (N, ). Defaults to True.
+
+    Returns:
+        Either a torch tensor of size (N, len(columns)) where N is the
+        number of rows in the ``data_batch`` Dataframe, or a list of
+        tensors, where the size of item i is (N, len(columns[i])).
+
+    """
+
+    multi_input = columns and (isinstance(columns[0], (list, tuple)))
+
+    if not multi_input and column_dtypes and not isinstance(column_dtypes, torch.dtype):
+        raise TypeError(
+            "If `columns` is a list of strings, "
+            "`column_dtypes` must be None or a single `torch.dtype`."
+            f"Got {type(column_dtypes)} instead."
+        )
+
+    columns = columns if columns else []
+
+    def tensorize(vals, dtype):
+        """This recursive function allows to convert pyarrow List dtypes
+        to multi-dimensional tensors."""
+        if isinstance(vals, pd.api.extensions.ExtensionArray):
+            # torch.as_tensor() does not yet support the __array__ protocol, so we need
+            # to convert extension arrays to ndarrays manually before converting to a
+            # Torch tensor.
+            # See https://github.com/pytorch/pytorch/issues/51156.
+            vals = vals.to_numpy()
+
+        if vals.dtype.type is np.object_:
+            # Column has an object dtype which Torch can't handle, so we try to
+            # tensorize each column element and then stack the resulting tensors.
+            tensors = [tensorize(x, dtype) for x in vals]
+            try:
+                return torch.stack(tensors)
+            except RuntimeError:
+                # NOTE: RuntimeError is raised when trying to stack ragged tensors.
+                # Try to coerce the tensor to a nested tensor, if possible.
+                # If this fails, the exception will be propagated up to the caller.
+                return torch.nested_tensor(tensors)
+        else:
+            return torch.as_tensor(vals, dtype=dtype)
+
+    def get_tensor_for_columns(columns, dtype):
+        feature_tensors = []
+
+        if columns:
+            batch = data_batch[columns]
+        else:
+            batch = data_batch
+
+        for col in batch.columns:
+            col_vals = batch[col].values
+            try:
+                t = tensorize(col_vals, dtype=dtype)
+            except Exception as e:
+                raise ValueError(
+                    f"Failed to convert column {col} to a Torch Tensor of dtype "
+                    f"{dtype}. See above exception chain for the exact failure."
+                ) from e
+            if unsqueeze:
+                t = t.unsqueeze(1)
+            feature_tensors.append(t)
+
+        if len(feature_tensors) > 1:
+            feature_tensor = torch.cat(feature_tensors, dim=1)
+        else:
+            feature_tensor = feature_tensors[0]
+        return feature_tensor
+
+    if multi_input:
+        if type(column_dtypes) not in [list, tuple]:
+            column_dtypes = [column_dtypes] * len(columns)
+        return [
+            get_tensor_for_columns(columns=subcolumns, dtype=dtype)
+            for subcolumns, dtype in zip(columns, column_dtypes)
+        ]
+    else:
+        return get_tensor_for_columns(columns=columns, dtype=column_dtypes)
+
+
+def convert_ndarray_to_torch_tensor(
+    ndarray: np.ndarray,
+    dtype: Optional[torch.dtype] = None,
+    device: Optional[str] = None,
+) -> torch.Tensor:
+    """Convert a NumPy ndarray to a Torch Tensor.
+
+    Args:
+        ndarray: A NumPy ndarray that we wish to convert to a Torch Tensor.
+        dtype: A Torch dtype for the created tensor; if None, the dtype will be
+            inferred from the NumPy ndarray data.
+        device: The device on which the tensor(s) should be placed; if None, the Torch
+            tensor(s) will be constructed on the CPU.
+
+    Returns: A Torch Tensor.
+    """
+    ndarray = _unwrap_ndarray_object_type_if_needed(ndarray)
+
+    # Object dtype cannot be converted into PyTorch Tensor.
+    if ndarray.dtype.type is np.object_:
+        raise RuntimeError(
+            "Numpy array of object dtype cannot be converted to a Torch Tensor. This "
+            "may because the numpy array is a ragged tensor--it contains items of "
+            "different sizes. If using `iter_torch_batches()` API, you can pass in a "
+            "`collate_fn` argument to specify custom logic to convert the Numpy array "
+            "batch to a Torch tensor batch."
+        )
+
+    # The numpy array is not always writeable as it can come from the Ray object store.
+    # Numpy will throw a verbose warning here, which we suppress, as we don't write
+    # to the tensors. We also don't want to copy the array to avoid memory overhead.
+    # Original warning: https://github.com/pytorch/pytorch/blob/v1.13.0/
+    # torch/csrc/utils/tensor_numpy.cpp#L198-L206
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        return torch.as_tensor(ndarray, dtype=dtype, device=device)
+
+
+def convert_ndarray_batch_to_torch_tensor_batch(
+    ndarrays: Union[np.ndarray, Dict[str, np.ndarray]],
+    dtypes: Optional[Union[torch.dtype, Dict[str, torch.dtype]]] = None,
+    device: Optional[str] = None,
+) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+    """Convert a NumPy ndarray batch to a Torch Tensor batch.
+
+    Args:
+        ndarray: A (dict of) NumPy ndarray(s) that we wish to convert to a Torch Tensor.
+        dtype: A (dict of) Torch dtype(s) for the created tensor; if None, the dtype
+            will be inferred from the NumPy ndarray data.
+        device: The device on which the tensor(s) should be placed; if None, the Torch
+            tensor(s) will be constructed on the CPU.
+
+    Returns: A (dict of) Torch Tensor(s).
+    """
+    if isinstance(ndarrays, np.ndarray):
+        # Single-tensor case.
+        if isinstance(dtypes, dict):
+            if len(dtypes) != 1:
+                raise ValueError(
+                    "When constructing a single-tensor batch, only a single dtype "
+                    f"should be given, instead got: {dtypes}"
+                )
+            dtypes = next(iter(dtypes.values()))
+        batch = convert_ndarray_to_torch_tensor(ndarrays, dtype=dtypes, device=device)
+    else:
+        # Multi-tensor case.
+        batch = {
+            col_name: convert_ndarray_to_torch_tensor(
+                col_ndarray,
+                dtype=dtypes[col_name] if isinstance(dtypes, dict) else dtypes,
+                device=device,
+            )
+            for col_name, col_ndarray in ndarrays.items()
+        }
+
+    return batch
+
+
+def load_torch_model(
+    saved_model: Union[torch.nn.Module, Dict],
+    model_definition: Optional[torch.nn.Module] = None,
+) -> torch.nn.Module:
+    """Loads a PyTorch model from the provided ``saved_model``.
+
+    ``model_definition`` is only used when ``saved_model`` is
+    a torch state dict, which will be loaded into ``model_definition``.
+    Otherwise, ``model_definition`` is discarded.
+    """
+    if isinstance(saved_model, torch.nn.Module):
+        return saved_model
+    elif isinstance(saved_model, dict):
+        if not model_definition:
+            raise ValueError(
+                "Attempting to load torch model from a "
+                "state_dict, but no `model_definition` was "
+                "provided."
+            )
+        model_definition.load_state_dict(saved_model)
+        return model_definition
+    else:
+        raise ValueError(
+            f"Saved model is of type {type(saved_model)}. "
+            f"The model saved in the checkpoint is expected "
+            f"to be of type `torch.nn.Module`, or a model "
+            f"state dict of type dict."
+        )
+
+
+def contains_tensor(obj):
+    if isinstance(obj, torch.Tensor):
+        return True
+    elif isinstance(obj, dict):
+        for k, v in obj.items():
+            if contains_tensor(k):
+                return True
+            if contains_tensor(v):
+                return True
+    elif isinstance(obj, (list, tuple)):
+        for v in obj:
+            if contains_tensor(v):
+                return True
+    return False
+
+
+# Not present in torch<=1.7.0
+# Adapted from https://github.com/pytorch/pytorch/blob/\
+# c18da597e0bb1c1aecc97c77a73fed1849057fa4/torch/nn/modules/utils.py
+def consume_prefix_in_state_dict_if_present_not_in_place(
+    state_dict: Dict[str, Any], prefix: str
+) -> Dict[str, Any]:
+    """Strip the prefix in state_dict, if any and return a new dict.
+
+    Adapted from https://github.com/pytorch/pytorch/blob/\
+c18da597e0bb1c1aecc97c77a73fed1849057fa4/torch/nn/modules/utils.py
+    The original method modified the dict in-place.
+
+    Args:
+        state_dict: a state-dict to be loaded to the model.
+        prefix: prefix.
+
+    """
+    copied = False
+
+    for key in state_dict:
+        if key.startswith(prefix):
+            newkey = key[len(prefix) :]
+            if not copied:
+                # We are doing shallow copies here, so the performance
+                # impact should be negligible anyway, but this is
+                # a simple optimization.
+                state_dict = state_dict.copy()
+                copied = True
+            state_dict[newkey] = state_dict.pop(key)
+
+    if "_metadata" in state_dict:
+        state_dict["_metadata"] = state_dict["_metadata"].copy()
+        metadata = state_dict["_metadata"]
+        for key in metadata:
+            if len(key) == 0:
+                continue
+            newkey = key[len(prefix) :]
+            metadata[newkey] = metadata.pop(key)
+
+    return state_dict
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/uri_utils.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/uri_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..33c9ae7c10a29c57bac8f33a2abfd6678eb4a8de
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/uri_utils.py
@@ -0,0 +1,101 @@
+import os
+import urllib.parse
+from pathlib import Path
+from typing import Union
+
+
+class URI:
+    """Represents a URI, supporting path appending and retrieving parent URIs.
+
+    Example Usage:
+
+        >>> s3_uri = URI("s3://bucket/a?scheme=http&param=1")
+        >>> s3_uri
+        URI<s3://bucket/a?scheme=http&param=1>
+        >>> str(s3_uri / "b" / "c")
+        's3://bucket/a/b/c?scheme=http&param=1'
+        >>> str(s3_uri.parent)
+        's3://bucket?scheme=http&param=1'
+        >>> str(s3_uri)
+        's3://bucket/a?scheme=http&param=1'
+        >>> s3_uri.parent.name, s3_uri.name
+        ('bucket', 'a')
+        >>> local_path = URI("/tmp/local")
+        >>> str(local_path)
+        '/tmp/local'
+        >>> str(local_path.parent)
+        '/tmp'
+        >>> str(local_path / "b" / "c")
+        '/tmp/local/b/c'
+
+    Args:
+        uri: The URI to represent.
+            Ex: s3://bucket?scheme=http&endpoint_override=localhost%3A900
+            Ex: file:///a/b/c/d
+    """
+
+    def __init__(self, uri: str):
+        self._parsed = urllib.parse.urlparse(uri)
+        if not self._parsed.scheme:
+            # Just treat this as a regular path
+            self._path = Path(uri)
+        else:
+            self._path = Path(os.path.normpath(self._parsed.netloc + self._parsed.path))
+
+    def rstrip_subpath(self, subpath: Path) -> "URI":
+        """Returns a new URI that strips the given subpath from the end of this URI.
+
+        Example:
+            >>> uri = URI("s3://bucket/a/b/c/?param=1")
+            >>> str(uri.rstrip_subpath(Path("b/c")))
+            's3://bucket/a?param=1'
+
+            >>> uri = URI("/tmp/a/b/c/")
+            >>> str(uri.rstrip_subpath(Path("/b/c/.//")))
+            '/tmp/a'
+
+        """
+        assert str(self._path).endswith(str(subpath)), (self._path, subpath)
+        stripped_path = str(self._path).replace(str(subpath), "")
+        return URI(self._get_str_representation(self._parsed, stripped_path))
+
+    @property
+    def name(self) -> str:
+        return self._path.name
+
+    @property
+    def parent(self) -> "URI":
+        assert self._path.parent != ".", f"{str(self)} has no valid parent URI"
+        return URI(self._get_str_representation(self._parsed, self._path.parent))
+
+    @property
+    def scheme(self) -> str:
+        return self._parsed.scheme
+
+    @property
+    def path(self) -> str:
+        return str(self._path)
+
+    def __truediv__(self, path_to_append):
+        assert isinstance(path_to_append, str)
+        return URI(
+            self._get_str_representation(self._parsed, self._path / path_to_append)
+        )
+
+    @classmethod
+    def _get_str_representation(
+        cls, parsed_uri: urllib.parse.ParseResult, path: Union[str, Path]
+    ) -> str:
+        if not parsed_uri.scheme:
+            return str(path)
+        return parsed_uri._replace(netloc=str(path), path="").geturl()
+
+    def __repr__(self):
+        return f"URI<{str(self)}>"
+
+    def __str__(self):
+        return self._get_str_representation(self._parsed, self._path)
+
+
+def is_uri(path: str) -> bool:
+    return bool(urllib.parse.urlparse(path).scheme)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/usage.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/usage.py
new file mode 100644
index 0000000000000000000000000000000000000000..64e41a83eb7edadd4320c67a147a93c6a5082212
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/usage.py
@@ -0,0 +1,257 @@
+import collections
+import json
+import os
+from enum import Enum
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
+
+from ray._private.usage.usage_lib import TagKey, record_extra_usage_tag
+
+if TYPE_CHECKING:
+    from ray.train._internal.storage import StorageContext
+    from ray.train.trainer import BaseTrainer
+    from ray.tune import Callback
+    from ray.tune.schedulers import TrialScheduler
+    from ray.tune.search import BasicVariantGenerator, Searcher
+
+
+AIR_TRAINERS = {
+    "HorovodTrainer",
+    "LightGBMTrainer",
+    "TensorflowTrainer",
+    "TorchTrainer",
+    "XGBoostTrainer",
+}
+
+# searchers implemented by Ray Tune.
+TUNE_SEARCHERS = {
+    "AxSearch",
+    "BayesOptSearch",
+    "TuneBOHB",
+    "HEBOSearch",
+    "HyperOptSearch",
+    "NevergradSearch",
+    "OptunaSearch",
+    "ZOOptSearch",
+}
+
+# These are just wrappers around real searchers.
+# We don't want to double tag in this case, otherwise, the real tag
+# will be overwritten.
+TUNE_SEARCHER_WRAPPERS = {
+    "ConcurrencyLimiter",
+    "Repeater",
+}
+
+TUNE_SCHEDULERS = {
+    "FIFOScheduler",
+    "AsyncHyperBandScheduler",
+    "MedianStoppingRule",
+    "HyperBandScheduler",
+    "HyperBandForBOHB",
+    "PopulationBasedTraining",
+    "PopulationBasedTrainingReplay",
+    "PB2",
+    "ResourceChangingScheduler",
+}
+
+
+class AirEntrypoint(Enum):
+    TUNER = "Tuner.fit"
+    TRAINER = "Trainer.fit"
+    TUNE_RUN = "tune.run"
+    TUNE_RUN_EXPERIMENTS = "tune.run_experiments"
+
+
+def _find_class_name(obj, allowed_module_path_prefix: str, whitelist: Set[str]):
+    """Find the class name of the object. If the object is not
+    under `allowed_module_path_prefix` or if its class is not in the whitelist,
+    return "Custom".
+
+    Args:
+        obj: The object under inspection.
+        allowed_module_path_prefix: If the `obj`'s class is not under
+            the `allowed_module_path_prefix`, its class name will be anonymized.
+        whitelist: If the `obj`'s class is not in the `whitelist`,
+            it will be anonymized.
+    Returns:
+        The class name to be tagged with telemetry.
+    """
+    module_path = obj.__module__
+    cls_name = obj.__class__.__name__
+    if module_path.startswith(allowed_module_path_prefix) and cls_name in whitelist:
+        return cls_name
+    else:
+        return "Custom"
+
+
+def tag_air_trainer(trainer: "BaseTrainer"):
+    from ray.train.trainer import BaseTrainer
+
+    assert isinstance(trainer, BaseTrainer)
+    trainer_name = _find_class_name(trainer, "ray.train", AIR_TRAINERS)
+    record_extra_usage_tag(TagKey.AIR_TRAINER, trainer_name)
+
+
+def tag_searcher(searcher: Union["BasicVariantGenerator", "Searcher"]):
+    from ray.tune.search import BasicVariantGenerator, Searcher
+
+    if isinstance(searcher, BasicVariantGenerator):
+        # Note this could be highly inflated as all train flows are treated
+        # as using BasicVariantGenerator.
+        record_extra_usage_tag(TagKey.TUNE_SEARCHER, "BasicVariantGenerator")
+    elif isinstance(searcher, Searcher):
+        searcher_name = _find_class_name(
+            searcher, "ray.tune.search", TUNE_SEARCHERS.union(TUNE_SEARCHER_WRAPPERS)
+        )
+        if searcher_name in TUNE_SEARCHER_WRAPPERS:
+            # ignore to avoid double tagging with wrapper name.
+            return
+        record_extra_usage_tag(TagKey.TUNE_SEARCHER, searcher_name)
+    else:
+        assert False, (
+            "Not expecting a non-BasicVariantGenerator, "
+            "non-Searcher type passed in for `tag_searcher`."
+        )
+
+
+def tag_scheduler(scheduler: "TrialScheduler"):
+    from ray.tune.schedulers import TrialScheduler
+
+    assert isinstance(scheduler, TrialScheduler)
+    scheduler_name = _find_class_name(scheduler, "ray.tune.schedulers", TUNE_SCHEDULERS)
+    record_extra_usage_tag(TagKey.TUNE_SCHEDULER, scheduler_name)
+
+
+def tag_setup_wandb():
+    record_extra_usage_tag(TagKey.AIR_SETUP_WANDB_INTEGRATION_USED, "1")
+
+
+def tag_setup_mlflow():
+    record_extra_usage_tag(TagKey.AIR_SETUP_MLFLOW_INTEGRATION_USED, "1")
+
+
+def _count_callbacks(callbacks: Optional[List["Callback"]]) -> Dict[str, int]:
+    """Creates a map of callback class name -> count given a list of callbacks."""
+    from ray.air.integrations.comet import CometLoggerCallback
+    from ray.air.integrations.mlflow import MLflowLoggerCallback
+    from ray.air.integrations.wandb import WandbLoggerCallback
+    from ray.tune import Callback
+    from ray.tune.logger import LoggerCallback
+    from ray.tune.logger.aim import AimLoggerCallback
+    from ray.tune.utils.callback import DEFAULT_CALLBACK_CLASSES
+
+    built_in_callbacks = (
+        WandbLoggerCallback,
+        MLflowLoggerCallback,
+        CometLoggerCallback,
+        AimLoggerCallback,
+    ) + DEFAULT_CALLBACK_CLASSES
+
+    callback_names = [callback_cls.__name__ for callback_cls in built_in_callbacks]
+    callback_counts = collections.defaultdict(int)
+
+    callbacks = callbacks or []
+    for callback in callbacks:
+        if not isinstance(callback, Callback):
+            # This will error later, but don't include this as custom usage.
+            continue
+
+        callback_name = callback.__class__.__name__
+
+        if callback_name in callback_names:
+            callback_counts[callback_name] += 1
+        elif isinstance(callback, LoggerCallback):
+            callback_counts["CustomLoggerCallback"] += 1
+        else:
+            callback_counts["CustomCallback"] += 1
+
+    return callback_counts
+
+
+def tag_callbacks(callbacks: Optional[List["Callback"]]) -> bool:
+    """Records built-in callback usage via a JSON str representing a
+    dictionary mapping callback class name -> counts.
+
+    User-defined callbacks will increment the count under the `CustomLoggerCallback`
+    or `CustomCallback` key depending on which of the provided interfaces they subclass.
+    NOTE: This will NOT track the name of the user-defined callback,
+    nor its implementation.
+
+    This will NOT report telemetry if no callbacks are provided by the user.
+
+    Returns:
+        bool: True if usage was recorded, False otherwise.
+    """
+    if not callbacks:
+        # User didn't pass in any callbacks -> no usage recorded.
+        return False
+
+    callback_counts = _count_callbacks(callbacks)
+
+    if callback_counts:
+        callback_counts_str = json.dumps(callback_counts)
+        record_extra_usage_tag(TagKey.AIR_CALLBACKS, callback_counts_str)
+
+
+def tag_storage_type(storage: "StorageContext"):
+    """Records the storage configuration of an experiment.
+
+    The storage configuration is set by `RunConfig(storage_path, storage_filesystem)`.
+
+    The possible storage types (defined by `pyarrow.fs.FileSystem.type_name`) are:
+    - 'local' = pyarrow.fs.LocalFileSystem. This includes NFS usage.
+    - 'mock' = pyarrow.fs._MockFileSystem. This is used for testing.
+    - ('s3', 'gcs', 'abfs', 'hdfs'): Various remote storage schemes
+        with default implementations in pyarrow.
+    - 'custom' = All other storage schemes, which includes ALL cases where a
+        custom `storage_filesystem` is provided.
+    - 'other' = catches any other cases not explicitly handled above.
+    """
+    whitelist = {"local", "mock", "s3", "gcs", "abfs", "hdfs"}
+
+    if storage.custom_fs_provided:
+        storage_config_tag = "custom"
+    elif storage.storage_filesystem.type_name in whitelist:
+        storage_config_tag = storage.storage_filesystem.type_name
+    else:
+        storage_config_tag = "other"
+
+    record_extra_usage_tag(TagKey.AIR_STORAGE_CONFIGURATION, storage_config_tag)
+
+
+def tag_ray_air_env_vars() -> bool:
+    """Records usage of environment variables exposed by the Ray AIR libraries.
+
+    NOTE: This does not track the values of the environment variables, nor
+    does this track environment variables not explicitly included in the
+    `all_ray_air_env_vars` allow-list.
+
+    Returns:
+        bool: True if at least one environment var is supplied by the user.
+    """
+    from ray.air.constants import AIR_ENV_VARS
+    from ray.train.constants import TRAIN_ENV_VARS
+    from ray.tune.constants import TUNE_ENV_VARS
+
+    all_ray_air_env_vars = sorted(
+        set().union(AIR_ENV_VARS, TUNE_ENV_VARS, TRAIN_ENV_VARS)
+    )
+
+    user_supplied_env_vars = []
+
+    for env_var in all_ray_air_env_vars:
+        if env_var in os.environ:
+            user_supplied_env_vars.append(env_var)
+
+    if user_supplied_env_vars:
+        env_vars_str = json.dumps(user_supplied_env_vars)
+        record_extra_usage_tag(TagKey.AIR_ENV_VARS, env_vars_str)
+        return True
+
+    return False
+
+
+def tag_air_entrypoint(entrypoint: AirEntrypoint) -> None:
+    """Records the entrypoint to an AIR training run."""
+    assert entrypoint in AirEntrypoint
+    record_extra_usage_tag(TagKey.AIR_ENTRYPOINT, entrypoint.value)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/_internal/util.py b/.venv/lib/python3.11/site-packages/ray/air/_internal/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddceba726ee46ea1a1e884e3511430af1e282870
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/_internal/util.py
@@ -0,0 +1,134 @@
+import copy
+import logging
+import os
+import queue
+import socket
+import threading
+from contextlib import closing
+from typing import Optional
+
+import numpy as np
+
+from ray.air.constants import _ERROR_REPORT_TIMEOUT
+
+logger = logging.getLogger(__name__)
+
+
+def find_free_port():
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(("", 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.getsockname()[1]
+
+
+def is_nan(value):
+    return np.isnan(value)
+
+
+def is_nan_or_inf(value):
+    return is_nan(value) or np.isinf(value)
+
+
+class StartTraceback(Exception):
+    """These exceptions (and their tracebacks) can be skipped with `skip_exceptions`"""
+
+    pass
+
+
+class StartTracebackWithWorkerRank(StartTraceback):
+    def __init__(self, worker_rank: int) -> None:
+        super().__init__()
+        self.worker_rank = worker_rank
+
+    def __reduce__(self):
+        return (self.__class__, (self.worker_rank,))
+
+
+def skip_exceptions(exc: Optional[Exception]) -> Exception:
+    """Skip all contained `StartTracebacks` to reduce traceback output.
+
+    Returns a shallow copy of the exception with all `StartTracebacks` removed.
+
+    If the RAY_AIR_FULL_TRACEBACKS environment variable is set,
+    the original exception (not a copy) is returned.
+    """
+    should_not_shorten = bool(int(os.environ.get("RAY_AIR_FULL_TRACEBACKS", "0")))
+
+    if should_not_shorten:
+        return exc
+
+    if isinstance(exc, StartTraceback):
+        # If this is a StartTraceback, skip
+        return skip_exceptions(exc.__cause__)
+
+    # Perform a shallow copy to prevent recursive __cause__/__context__.
+    new_exc = copy.copy(exc).with_traceback(exc.__traceback__)
+
+    # Make sure nested exceptions are properly skipped.
+    cause = getattr(exc, "__cause__", None)
+    if cause:
+        new_exc.__cause__ = skip_exceptions(cause)
+
+    return new_exc
+
+
+def exception_cause(exc: Optional[Exception]) -> Optional[Exception]:
+    if not exc:
+        return None
+
+    return getattr(exc, "__cause__", None)
+
+
+class RunnerThread(threading.Thread):
+    """Supervisor thread that runs your script."""
+
+    def __init__(self, *args, error_queue, **kwargs):
+        threading.Thread.__init__(self, *args, **kwargs)
+        self._error_queue = error_queue
+        self._ret = None
+
+    def _propagate_exception(self, e: BaseException):
+        try:
+            # report the error but avoid indefinite blocking which would
+            # prevent the exception from being propagated in the unlikely
+            # case that something went terribly wrong
+            self._error_queue.put(e, block=True, timeout=_ERROR_REPORT_TIMEOUT)
+        except queue.Full:
+            logger.critical(
+                (
+                    "Runner Thread was unable to report error to main "
+                    "function runner thread. This means a previous error "
+                    "was not processed. This should never happen."
+                )
+            )
+
+    def run(self):
+        try:
+            self._ret = self._target(*self._args, **self._kwargs)
+        except StopIteration:
+            logger.debug(
+                (
+                    "Thread runner raised StopIteration. Interpreting it as a "
+                    "signal to terminate the thread without error."
+                )
+            )
+        except SystemExit as e:
+            # Do not propagate up for graceful termination.
+            if e.code == 0:
+                logger.debug(
+                    (
+                        "Thread runner raised SystemExit with error code 0. "
+                        "Interpreting it as a signal to terminate the thread "
+                        "without error."
+                    )
+                )
+            else:
+                # If non-zero exit code, then raise exception to main thread.
+                self._propagate_exception(e)
+        except BaseException as e:
+            # Propagate all other exceptions to the main thread.
+            self._propagate_exception(e)
+
+    def join(self, timeout=None):
+        super(RunnerThread, self).join(timeout)
+        return self._ret
diff --git a/.venv/lib/python3.11/site-packages/ray/air/examples/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/examples/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/air/examples/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/examples/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2dc7cbe872fb03edb82aff479e3931718562bfc5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/examples/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/examples/__pycache__/custom_trainer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/examples/__pycache__/custom_trainer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44013899d4f5d2865c93e4eae9bbe69ce5145e40
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/examples/__pycache__/custom_trainer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/examples/custom_trainer.py b/.venv/lib/python3.11/site-packages/ray/air/examples/custom_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac37ec892300fdf28bd869ce12c2c4a17ccf3b6
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/examples/custom_trainer.py
@@ -0,0 +1,61 @@
+# ruff: noqa
+# isort: skip_file
+# TODO(rliaw): Include this in the docs.
+
+# fmt: off
+# __custom_trainer_begin__
+import torch
+
+from ray import train
+from ray.train.trainer import BaseTrainer
+
+
+class MyPytorchTrainer(BaseTrainer):
+    def setup(self):
+        self.model = torch.nn.Linear(1, 1)
+        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1)
+
+    def training_loop(self):
+        # You can access any Trainer attributes directly in this method.
+        # self.datasets["train"] has already been
+        # preprocessed by self.preprocessor
+        dataset = self.datasets["train"]
+
+        loss_fn = torch.nn.MSELoss()
+
+        for epoch_idx in range(10):
+            loss = 0
+            num_batches = 0
+            for batch in dataset.iter_torch_batches(dtypes=torch.float):
+                # Compute prediction error
+                X, y = torch.unsqueeze(batch["x"], 1), batch["y"]
+                pred = self.model(X)
+                batch_loss = loss_fn(pred, y)
+
+                # Backpropagation
+                self.optimizer.zero_grad()
+                batch_loss.backward()
+                self.optimizer.step()
+
+                loss += batch_loss.item()
+                num_batches += 1
+            loss /= num_batches
+
+            # Use Tune functions to report intermediate
+            # results.
+            train.report({"loss": loss, "epoch": epoch_idx})
+
+
+# __custom_trainer_end__
+# fmt: on
+
+
+# fmt: off
+# __custom_trainer_usage_begin__
+import ray
+
+train_dataset = ray.data.from_items([{"x": i, "y": i} for i in range(3)])
+my_trainer = MyPytorchTrainer(datasets={"train": train_dataset})
+result = my_trainer.fit()
+# __custom_trainer_usage_end__
+# fmt: on
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..db48d7854eec8683013753f9d54a667826ee4071
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__init__.py
@@ -0,0 +1,5 @@
+from ray.air.execution._internal.actor_manager import RayActorManager
+from ray.air.execution._internal.barrier import Barrier
+from ray.air.execution._internal.tracked_actor import TrackedActor
+
+__all__ = ["Barrier", "RayActorManager", "TrackedActor"]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6dd5b621d29a90d409ae78bdae0e9ea3030939ec
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/actor_manager.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/actor_manager.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6bfb54533a59d915130215e572cd1bf334c0cb4
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/actor_manager.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/barrier.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/barrier.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b3fc47a728e254ca31d807747ab9cc74d591da9a
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/barrier.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/event_manager.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/event_manager.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59bbb107cc3202badc6b86daff2c88d0cab4d21d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/event_manager.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/tracked_actor.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/tracked_actor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..099b3a74d045ec11f7df8fe27a4f246a32a96d77
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/tracked_actor.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/tracked_actor_task.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/tracked_actor_task.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f394a8492a1c6a38d41f3cbb22fc0221c42361ca
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/__pycache__/tracked_actor_task.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/actor_manager.py b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/actor_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..33cf6d9ec9f28ca72197c176c5049c19a7239675
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/actor_manager.py
@@ -0,0 +1,894 @@
+import logging
+import random
+import time
+import uuid
+from collections import Counter, defaultdict
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
+
+import ray
+from ray.air.execution._internal.event_manager import RayEventManager
+from ray.air.execution._internal.tracked_actor import TrackedActor
+from ray.air.execution._internal.tracked_actor_task import TrackedActorTask
+from ray.air.execution.resources import (
+    AcquiredResources,
+    ResourceManager,
+    ResourceRequest,
+)
+from ray.exceptions import RayActorError, RayTaskError
+
+logger = logging.getLogger(__name__)
+
+
+class RayActorManager:
+    """Management class for Ray actors and actor tasks.
+
+    This class provides an event-based management interface for actors, and
+    actor tasks.
+
+    The manager can be used to start actors, stop actors, and schedule and
+    track task futures on these actors.
+    The manager will then invoke callbacks related to the tracked entities.
+
+    For instance, when an actor is added with
+    :meth:`add_actor() <RayActorManager.add_actor>`,
+    a :ref:`TrackedActor <ray.air.execution._internal.tracked_actor.TrackedActor`
+    object is returned. An ``on_start`` callback can be specified that is invoked
+    once the actor successfully started. Similarly, ``on_stop`` and ``on_error``
+    can be used to specify callbacks relating to the graceful or ungraceful
+    end of an actor's lifetime.
+
+    When scheduling an actor task using
+    :meth:`schedule_actor_task()
+    <ray.air.execution._internal.actor_manager.RayActorManager.schedule_actor_task>`,
+    an ``on_result`` callback can be specified that is invoked when the task
+    successfully resolves, and an ``on_error`` callback will resolve when the
+    task fails.
+
+    The RayActorManager does not implement any true asynchronous processing. Control
+    has to be explicitly yielded to the event manager via :meth:`RayActorManager.next`.
+    Callbacks will only be invoked when control is with the RayActorManager, and
+    callbacks will always be executed sequentially in order of arriving events.
+
+    Args:
+        resource_manager: Resource manager used to request resources for the actors.
+
+    Example:
+
+        .. code-block:: python
+
+            from ray.air.execution import ResourceRequest
+            from ray.air.execution._internal import RayActorManager
+
+            actor_manager = RayActorManager()
+
+            # Request an actor
+            tracked_actor = actor_manager.add_actor(
+                ActorClass,
+                kwargs={},
+                resource_request=ResourceRequest([{"CPU": 1}]),
+                on_start=actor_start_callback,
+                on_stop=actor_stop_callback,
+                on_error=actor_error_callback
+            )
+
+            # Yield control to event manager to start actor
+            actor_manager.next()
+
+            # Start task on the actor (ActorClass.foo.remote())
+            tracked_actor_task = actor_manager.schedule_actor_task(
+                tracked_actor,
+                method_name="foo",
+                on_result=task_result_callback,
+                on_error=task_error_callback
+            )
+
+            # Again yield control to event manager to process task futures
+            actor_manager.wait()
+
+    """
+
+    def __init__(self, resource_manager: ResourceManager):
+        self._resource_manager: ResourceManager = resource_manager
+
+        self._actor_state_events = RayEventManager()
+        self._actor_task_events = RayEventManager()
+
+        # ---
+        # Tracked actor futures.
+
+        # This maps TrackedActor objects to their futures. We use this to see if an
+        # actor has any futures scheduled and to remove them when we terminate an actor.
+
+        # Actors to actor task futures
+        self._tracked_actors_to_task_futures: Dict[
+            TrackedActor, Set[ray.ObjectRef]
+        ] = defaultdict(set)
+
+        # Actors to actor state futures (start/terminate)
+        self._tracked_actors_to_state_futures: Dict[
+            TrackedActor, Set[ray.ObjectRef]
+        ] = defaultdict(set)
+
+        # ---
+        # Pending actors.
+        # We use three dicts for actors that are requested but not yet started.
+
+        # This dict keeps a list of actors associated with each resource request.
+        # We use this to start actors in the correct order when their resources
+        # become available.
+        self._resource_request_to_pending_actors: Dict[
+            ResourceRequest, List[TrackedActor]
+        ] = defaultdict(list)
+
+        # This dict stores the actor class, kwargs, and resource request of
+        # pending actors. Once the resources are available, we start the remote
+        # actor class with its args. We need the resource request to cancel it
+        # if needed.
+        self._pending_actors_to_attrs: Dict[
+            TrackedActor, Tuple[Type, Dict[str, Any], ResourceRequest]
+        ] = {}
+
+        # This dict keeps track of cached actor tasks. We can't schedule actor
+        # tasks before the actor is actually scheduled/live. So when the caller
+        # tries to schedule a task, we cache it here, and schedule it once the
+        # actor is started.
+        self._pending_actors_to_enqueued_actor_tasks: Dict[
+            TrackedActor, List[Tuple[TrackedActorTask, str, Tuple[Any], Dict[str, Any]]]
+        ] = defaultdict(list)
+
+        # ---
+        # Live actors.
+        # We keep one dict for actors that are currently running and a set of
+        # actors that we should forcefully kill.
+
+        # This dict associates the TrackedActor object with the Ray actor handle
+        # and the resources associated to the actor. We use it to schedule the
+        # actual ray tasks, and to return the resources when the actor stopped.
+        self._live_actors_to_ray_actors_resources: Dict[
+            TrackedActor, Tuple[ray.actor.ActorHandle, AcquiredResources]
+        ] = {}
+        self._live_resource_cache: Optional[Dict[str, Any]] = None
+
+        # This dict contains all actors that should be killed (after calling
+        # `remove_actor()`). Kill requests will be handled in wait().
+        self._live_actors_to_kill: Set[TrackedActor] = set()
+
+        # Track failed actors
+        self._failed_actor_ids: Set[int] = set()
+
+    def next(self, timeout: Optional[Union[int, float]] = None) -> bool:
+        """Yield control to event manager to await the next event and invoke callbacks.
+
+        Calling this method will wait for up to ``timeout`` seconds for the next
+        event to arrive.
+
+        When events arrive, callbacks relating to the events will be
+        invoked. A timeout of ``None`` will block until the next event arrives.
+
+        Note:
+            If an actor task fails with a ``RayActorError``, this is one event,
+            but it may trigger _two_ `on_error` callbacks: One for the actor,
+            and one for the task.
+
+        Note:
+            The ``timeout`` argument is used for pure waiting time for events. It does
+            not include time spent on processing callbacks. Depending on the processing
+            time of the callbacks, it can take much longer for this function to
+            return than the specified timeout.
+
+        Args:
+            timeout: Timeout in seconds to wait for next event.
+
+        Returns:
+            True if at least one event was processed.
+
+        """
+        # First issue any pending forceful actor kills
+        actor_killed = self._try_kill_actor()
+
+        # We always try to start actors as this won't trigger an event callback
+        self._try_start_actors()
+
+        # If an actor was killed, this was our event, and we return.
+        if actor_killed:
+            return True
+
+        # Otherwise, collect all futures and await the next.
+        resource_futures = self._resource_manager.get_resource_futures()
+        actor_state_futures = self._actor_state_events.get_futures()
+        actor_task_futures = self._actor_task_events.get_futures()
+
+        # Shuffle state futures
+        shuffled_state_futures = list(actor_state_futures)
+        random.shuffle(shuffled_state_futures)
+
+        # Shuffle task futures
+        shuffled_task_futures = list(actor_task_futures)
+        random.shuffle(shuffled_task_futures)
+
+        # Prioritize resource futures over actor state over task futures
+        all_futures = resource_futures + shuffled_state_futures + shuffled_task_futures
+
+        start_wait = time.monotonic()
+        ready, _ = ray.wait(all_futures, num_returns=1, timeout=timeout)
+
+        if not ready:
+            return False
+
+        [future] = ready
+
+        if future in actor_state_futures:
+            self._actor_state_events.resolve_future(future)
+        elif future in actor_task_futures:
+            self._actor_task_events.resolve_future(future)
+        else:
+            self._handle_ready_resource_future()
+            # Ready resource futures don't count as one event as they don't trigger
+            # any callbacks. So we repeat until we hit anything that is not a resource
+            # future.
+            time_taken = time.monotonic() - start_wait
+            return self.next(
+                timeout=max(1e-9, timeout - time_taken) if timeout is not None else None
+            )
+
+        self._try_start_actors()
+        return True
+
+    def _actor_start_resolved(self, tracked_actor: TrackedActor, future: ray.ObjectRef):
+        """Callback to be invoked when actor started"""
+        self._tracked_actors_to_state_futures[tracked_actor].remove(future)
+
+        if tracked_actor._on_start:
+            tracked_actor._on_start(tracked_actor)
+
+    def _actor_stop_resolved(self, tracked_actor: TrackedActor):
+        """Callback to be invoked when actor stopped"""
+        self._cleanup_actor(tracked_actor=tracked_actor)
+
+        if tracked_actor._on_stop:
+            tracked_actor._on_stop(tracked_actor)
+
+    def _actor_start_failed(self, tracked_actor: TrackedActor, exception: Exception):
+        """Callback to be invoked when actor start/stop failed"""
+        self._failed_actor_ids.add(tracked_actor.actor_id)
+
+        self._cleanup_actor(tracked_actor=tracked_actor)
+
+        if tracked_actor._on_error:
+            tracked_actor._on_error(tracked_actor, exception)
+
+    def _actor_task_failed(
+        self, tracked_actor_task: TrackedActorTask, exception: Exception
+    ):
+        """Handle an actor task future that became ready.
+
+        - On actor error, trigger actor error callback AND error task error callback
+        - On task error, trigger actor task error callback
+        - On success, trigger actor task result callback
+        """
+        tracked_actor = tracked_actor_task._tracked_actor
+
+        if isinstance(exception, RayActorError):
+            self._failed_actor_ids.add(tracked_actor.actor_id)
+
+            # Clean up any references to the actor and its futures
+            self._cleanup_actor(tracked_actor=tracked_actor)
+
+            # Handle actor state callbacks
+            if tracked_actor._on_error:
+                tracked_actor._on_error(tracked_actor, exception)
+
+            # Then trigger actor task error callback
+            if tracked_actor_task._on_error:
+                tracked_actor_task._on_error(tracked_actor, exception)
+
+        elif isinstance(exception, RayTaskError):
+            # Otherwise only the task failed. Invoke callback
+            if tracked_actor_task._on_error:
+                tracked_actor_task._on_error(tracked_actor, exception)
+        else:
+            raise RuntimeError(
+                f"Caught unexpected exception: {exception}"
+            ) from exception
+
+    def _actor_task_resolved(self, tracked_actor_task: TrackedActorTask, result: Any):
+        tracked_actor = tracked_actor_task._tracked_actor
+
+        # Trigger actor task result callback
+        if tracked_actor_task._on_result:
+            tracked_actor_task._on_result(tracked_actor, result)
+
+    def _handle_ready_resource_future(self):
+        """Handle a resource future that became ready.
+
+        - Update state of the resource manager
+        - Try to start one actor
+        """
+        # Force resource manager to update internal state
+        self._resource_manager.update_state()
+        # We handle resource futures one by one, so only try to start 1 actor at a time
+        self._try_start_actors(max_actors=1)
+
+    def _try_start_actors(self, max_actors: Optional[int] = None) -> int:
+        """Try to start up to ``max_actors`` actors.
+
+        This function will iterate through all resource requests we collected for
+        pending actors. As long as a resource request can be fulfilled (resources
+        are available), we try to start as many actors as possible.
+
+        This will schedule a `Actor.__ray_ready__()` future which, once resolved,
+        will trigger the `TrackedActor.on_start` callback.
+        """
+        started_actors = 0
+
+        # Iterate through all resource requests
+        for resource_request in self._resource_request_to_pending_actors:
+            if max_actors is not None and started_actors >= max_actors:
+                break
+
+            # While we have resources ready and there are actors left to schedule
+            while (
+                self._resource_manager.has_resources_ready(resource_request)
+                and self._resource_request_to_pending_actors[resource_request]
+            ):
+                # Acquire resources for actor
+                acquired_resources = self._resource_manager.acquire_resources(
+                    resource_request
+                )
+                assert acquired_resources
+
+                # Get tracked actor to start
+                candidate_actors = self._resource_request_to_pending_actors[
+                    resource_request
+                ]
+                assert candidate_actors
+
+                tracked_actor = candidate_actors.pop(0)
+
+                # Get actor class and arguments
+                actor_cls, kwargs, _ = self._pending_actors_to_attrs.pop(tracked_actor)
+
+                if not isinstance(actor_cls, ray.actor.ActorClass):
+                    actor_cls = ray.remote(actor_cls)
+
+                # Associate to acquired resources
+                [remote_actor_cls] = acquired_resources.annotate_remote_entities(
+                    [actor_cls]
+                )
+
+                # Start Ray actor
+                actor = remote_actor_cls.remote(**kwargs)
+
+                # Track
+                self._live_actors_to_ray_actors_resources[tracked_actor] = (
+                    actor,
+                    acquired_resources,
+                )
+                self._live_resource_cache = None
+
+                # Schedule ready future
+                future = actor.__ray_ready__.remote()
+
+                self._tracked_actors_to_state_futures[tracked_actor].add(future)
+
+                # We need to create the callbacks in a function so tracked_actors
+                # are captured correctly.
+                def create_callbacks(
+                    tracked_actor: TrackedActor, future: ray.ObjectRef
+                ):
+                    def on_actor_start(result: Any):
+                        self._actor_start_resolved(
+                            tracked_actor=tracked_actor, future=future
+                        )
+
+                    def on_error(exception: Exception):
+                        self._actor_start_failed(
+                            tracked_actor=tracked_actor, exception=exception
+                        )
+
+                    return on_actor_start, on_error
+
+                on_actor_start, on_error = create_callbacks(
+                    tracked_actor=tracked_actor, future=future
+                )
+
+                self._actor_state_events.track_future(
+                    future=future,
+                    on_result=on_actor_start,
+                    on_error=on_error,
+                )
+
+                self._enqueue_cached_actor_tasks(tracked_actor=tracked_actor)
+
+                started_actors += 1
+
+        return started_actors
+
+    def _enqueue_cached_actor_tasks(self, tracked_actor: TrackedActor):
+        assert tracked_actor in self._live_actors_to_ray_actors_resources
+
+        # Enqueue cached futures
+        cached_tasks = self._pending_actors_to_enqueued_actor_tasks.pop(
+            tracked_actor, []
+        )
+        for tracked_actor_task, method_name, args, kwargs in cached_tasks:
+            self._schedule_tracked_actor_task(
+                tracked_actor_task=tracked_actor_task,
+                method_name=method_name,
+                args=args,
+                kwargs=kwargs,
+            )
+
+    def _try_kill_actor(self) -> bool:
+        """Try to kill actor scheduled for termination."""
+        if not self._live_actors_to_kill:
+            return False
+
+        tracked_actor = self._live_actors_to_kill.pop()
+
+        # Remove from tracked actors
+        (
+            ray_actor,
+            acquired_resources,
+        ) = self._live_actors_to_ray_actors_resources[tracked_actor]
+
+        # Hard kill if requested
+        ray.kill(ray_actor)
+
+        self._cleanup_actor_futures(tracked_actor)
+
+        self._actor_stop_resolved(tracked_actor)
+
+        return True
+
+    def _cleanup_actor(self, tracked_actor: TrackedActor):
+        self._cleanup_actor_futures(tracked_actor)
+
+        # Remove from tracked actors
+        (
+            ray_actor,
+            acquired_resources,
+        ) = self._live_actors_to_ray_actors_resources.pop(tracked_actor)
+        self._live_resource_cache = None
+
+        # Return resources
+        self._resource_manager.free_resources(acquired_resource=acquired_resources)
+
+    @property
+    def all_actors(self) -> List[TrackedActor]:
+        """Return all ``TrackedActor`` objects managed by this manager instance."""
+        return self.live_actors + self.pending_actors
+
+    @property
+    def live_actors(self) -> List[TrackedActor]:
+        """Return all ``TrackedActor`` objects that are currently alive."""
+        return list(self._live_actors_to_ray_actors_resources)
+
+    @property
+    def pending_actors(self) -> List[TrackedActor]:
+        """Return all ``TrackedActor`` objects that are currently pending."""
+        return list(self._pending_actors_to_attrs)
+
+    @property
+    def num_live_actors(self):
+        """Return number of started actors."""
+        return len(self.live_actors)
+
+    @property
+    def num_pending_actors(self) -> int:
+        """Return number of pending (not yet started) actors."""
+        return len(self.pending_actors)
+
+    @property
+    def num_total_actors(self):
+        """Return number of total actors."""
+        return len(self.all_actors)
+
+    @property
+    def num_actor_tasks(self):
+        """Return number of pending tasks"""
+        return self._actor_task_events.num_futures
+
+    def get_live_actors_resources(self):
+        if self._live_resource_cache:
+            return self._live_resource_cache
+
+        counter = Counter()
+        for _, acq in self._live_actors_to_ray_actors_resources.values():
+            for bdl in acq.resource_request.bundles:
+                counter.update(bdl)
+        self._live_resource_cache = dict(counter)
+        return self._live_resource_cache
+
+    def add_actor(
+        self,
+        cls: Union[Type, ray.actor.ActorClass],
+        kwargs: Dict[str, Any],
+        resource_request: ResourceRequest,
+        *,
+        on_start: Optional[Callable[[TrackedActor], None]] = None,
+        on_stop: Optional[Callable[[TrackedActor], None]] = None,
+        on_error: Optional[Callable[[TrackedActor, Exception], None]] = None,
+    ) -> TrackedActor:
+        """Add an actor to be tracked.
+
+        This method will request resources to start the actor. Once the resources
+        are available, the actor will be started and the
+        :meth:`TrackedActor.on_start
+        <ray.air.execution._internal.tracked_actor.TrackedActor.on_start>` callback
+        will be invoked.
+
+        Args:
+            cls: Actor class to schedule.
+            kwargs: Keyword arguments to pass to actor class on construction.
+            resource_request: Resources required to start the actor.
+            on_start: Callback to invoke when the actor started.
+            on_stop: Callback to invoke when the actor stopped.
+            on_error: Callback to invoke when the actor failed.
+
+        Returns:
+            Tracked actor object to reference actor in subsequent API calls.
+
+        """
+        tracked_actor = TrackedActor(
+            uuid.uuid4().int, on_start=on_start, on_stop=on_stop, on_error=on_error
+        )
+
+        self._pending_actors_to_attrs[tracked_actor] = cls, kwargs, resource_request
+        self._resource_request_to_pending_actors[resource_request].append(tracked_actor)
+
+        self._resource_manager.request_resources(resource_request=resource_request)
+
+        return tracked_actor
+
+    def remove_actor(
+        self,
+        tracked_actor: TrackedActor,
+        kill: bool = False,
+        stop_future: Optional[ray.ObjectRef] = None,
+    ) -> bool:
+        """Remove a tracked actor.
+
+        If the actor has already been started, this will stop the actor. This will
+        trigger the :meth:`TrackedActor.on_stop
+        <ray.air.execution._internal.tracked_actor.TrackedActor.on_stop>`
+        callback once the actor stopped.
+
+        If the actor has only been requested, but not started, yet, this will cancel
+        the actor request. This will not trigger any callback.
+
+        If ``kill=True``, this will use ``ray.kill()`` to forcefully terminate the
+        actor. Otherwise, graceful actor deconstruction will be scheduled after
+        all currently tracked futures are resolved.
+
+        This method returns a boolean, indicating if a stop future is tracked and
+        the ``on_stop`` callback will be invoked. If the actor has been alive,
+        this will be ``True``. If the actor hasn't been scheduled, yet, or failed
+        (and triggered the ``on_error`` callback), this will be ``False``.
+
+        Args:
+            tracked_actor: Tracked actor to be removed.
+            kill: If set, will forcefully terminate the actor instead of gracefully
+                scheduling termination.
+            stop_future: If set, use this future to track actor termination.
+                Otherwise, schedule a ``__ray_terminate__`` future.
+
+        Returns:
+            Boolean indicating if the actor was previously alive, and thus whether
+            a callback will be invoked once it is terminated.
+
+        """
+        if tracked_actor.actor_id in self._failed_actor_ids:
+            logger.debug(
+                f"Tracked actor already failed, no need to remove: {tracked_actor}"
+            )
+            return False
+        elif tracked_actor in self._live_actors_to_ray_actors_resources:
+            # Ray actor is running.
+
+            if not kill:
+                # Schedule __ray_terminate__ future
+                ray_actor, _ = self._live_actors_to_ray_actors_resources[tracked_actor]
+
+                # Clear state futures here to avoid resolving __ray_ready__ futures
+                for future in list(
+                    self._tracked_actors_to_state_futures[tracked_actor]
+                ):
+                    self._actor_state_events.discard_future(future)
+                    self._tracked_actors_to_state_futures[tracked_actor].remove(future)
+
+                    # If the __ray_ready__ future hasn't resolved yet, but we already
+                    # scheduled the actor via Actor.remote(), we just want to stop
+                    # it but not trigger any callbacks. This is in accordance with
+                    # the contract defined in the docstring.
+                    tracked_actor._on_start = None
+                    tracked_actor._on_stop = None
+                    tracked_actor._on_error = None
+
+                def on_actor_stop(*args, **kwargs):
+                    self._actor_stop_resolved(tracked_actor=tracked_actor)
+
+                if stop_future:
+                    # If the stop future was schedule via the actor manager,
+                    # discard (track it as state future instead).
+                    self._actor_task_events.discard_future(stop_future)
+                else:
+                    stop_future = ray_actor.__ray_terminate__.remote()
+
+                self._actor_state_events.track_future(
+                    future=stop_future,
+                    on_result=on_actor_stop,
+                    on_error=on_actor_stop,
+                )
+
+                self._tracked_actors_to_state_futures[tracked_actor].add(stop_future)
+            else:
+                # kill = True
+                self._live_actors_to_kill.add(tracked_actor)
+
+            return True
+
+        elif tracked_actor in self._pending_actors_to_attrs:
+            # Actor is pending, stop
+            _, _, resource_request = self._pending_actors_to_attrs.pop(tracked_actor)
+            self._resource_request_to_pending_actors[resource_request].remove(
+                tracked_actor
+            )
+            self._resource_manager.cancel_resource_request(
+                resource_request=resource_request
+            )
+            return False
+        else:
+            raise ValueError(f"Unknown tracked actor: {tracked_actor}")
+
+    def is_actor_started(self, tracked_actor: TrackedActor) -> bool:
+        """Returns True if the actor has been started.
+
+        Args:
+            tracked_actor: Tracked actor object.
+        """
+        return (
+            tracked_actor in self._live_actors_to_ray_actors_resources
+            and tracked_actor.actor_id not in self._failed_actor_ids
+        )
+
+    def is_actor_failed(self, tracked_actor: TrackedActor) -> bool:
+        return tracked_actor.actor_id in self._failed_actor_ids
+
+    def get_actor_resources(
+        self, tracked_actor: TrackedActor
+    ) -> Optional[AcquiredResources]:
+        """Returns the acquired resources of an actor that has been started.
+
+        This will return ``None`` if the actor has not been started, yet.
+
+        Args:
+            tracked_actor: Tracked actor object.
+        """
+        if not self.is_actor_started(tracked_actor):
+            return None
+
+        return self._live_actors_to_ray_actors_resources[tracked_actor][1]
+
+    def schedule_actor_task(
+        self,
+        tracked_actor: TrackedActor,
+        method_name: str,
+        args: Optional[Tuple] = None,
+        kwargs: Optional[Dict] = None,
+        on_result: Optional[Callable[[TrackedActor, Any], None]] = None,
+        on_error: Optional[Callable[[TrackedActor, Exception], None]] = None,
+        _return_future: bool = False,
+    ) -> Optional[ray.ObjectRef]:
+        """Schedule and track a task on an actor.
+
+        This method will schedule a remote task ``method_name`` on the
+        ``tracked_actor``.
+
+        This method accepts two optional callbacks that will be invoked when
+        their respective events are triggered.
+
+        The ``on_result`` callback is triggered when a task resolves successfully.
+        It should accept two arguments: The actor for which the
+        task resolved, and the result received from the remote call.
+
+        The ``on_error`` callback is triggered when a task fails.
+        It should accept two arguments: The actor for which the
+        task threw an error, and the exception.
+
+        Args:
+            tracked_actor: Actor to schedule task on.
+            method_name: Remote method name to invoke on the actor. If this is
+                e.g. ``foo``, then ``actor.foo.remote(*args, **kwargs)`` will be
+                scheduled.
+            args: Arguments to pass to the task.
+            kwargs: Keyword arguments to pass to the task.
+            on_result: Callback to invoke when the task resolves.
+            on_error: Callback to invoke when the task fails.
+
+        Raises:
+            ValueError: If the ``tracked_actor`` is not managed by this event manager.
+
+        """
+        args = args or tuple()
+        kwargs = kwargs or {}
+
+        if tracked_actor.actor_id in self._failed_actor_ids:
+            return
+
+        tracked_actor_task = TrackedActorTask(
+            tracked_actor=tracked_actor, on_result=on_result, on_error=on_error
+        )
+
+        if tracked_actor not in self._live_actors_to_ray_actors_resources:
+            # Actor is not started, yet
+            if tracked_actor not in self._pending_actors_to_attrs:
+                raise ValueError(
+                    f"Tracked actor is not managed by this event manager: "
+                    f"{tracked_actor}"
+                )
+
+            # Cache tasks for future execution
+            self._pending_actors_to_enqueued_actor_tasks[tracked_actor].append(
+                (tracked_actor_task, method_name, args, kwargs)
+            )
+        else:
+            res = self._schedule_tracked_actor_task(
+                tracked_actor_task=tracked_actor_task,
+                method_name=method_name,
+                args=args,
+                kwargs=kwargs,
+                _return_future=_return_future,
+            )
+            if _return_future:
+                return res[1]
+
+    def _schedule_tracked_actor_task(
+        self,
+        tracked_actor_task: TrackedActorTask,
+        method_name: str,
+        *,
+        args: Optional[Tuple] = None,
+        kwargs: Optional[Dict] = None,
+        _return_future: bool = False,
+    ) -> Union[TrackedActorTask, Tuple[TrackedActorTask, ray.ObjectRef]]:
+        tracked_actor = tracked_actor_task._tracked_actor
+        ray_actor, _ = self._live_actors_to_ray_actors_resources[tracked_actor]
+
+        try:
+            remote_fn = getattr(ray_actor, method_name)
+        except AttributeError as e:
+            raise AttributeError(
+                f"Remote function `{method_name}()` does not exist for this actor."
+            ) from e
+
+        def on_result(result: Any):
+            self._actor_task_resolved(
+                tracked_actor_task=tracked_actor_task, result=result
+            )
+
+        def on_error(exception: Exception):
+            self._actor_task_failed(
+                tracked_actor_task=tracked_actor_task, exception=exception
+            )
+
+        future = remote_fn.remote(*args, **kwargs)
+
+        self._actor_task_events.track_future(
+            future=future, on_result=on_result, on_error=on_error
+        )
+
+        self._tracked_actors_to_task_futures[tracked_actor].add(future)
+
+        if _return_future:
+            return tracked_actor_task, future
+
+        return tracked_actor_task
+
+    def schedule_actor_tasks(
+        self,
+        tracked_actors: List[TrackedActor],
+        method_name: str,
+        *,
+        args: Optional[Union[Tuple, List[Tuple]]] = None,
+        kwargs: Optional[Union[Dict, List[Dict]]] = None,
+        on_result: Optional[Callable[[TrackedActor, Any], None]] = None,
+        on_error: Optional[Callable[[TrackedActor, Exception], None]] = None,
+    ) -> None:
+        """Schedule and track tasks on a list of actors.
+
+        This method will schedule a remote task ``method_name`` on all
+        ``tracked_actors``.
+
+        ``args`` and ``kwargs`` can be a single tuple/dict, in which case the same
+        (keyword) arguments are passed to all actors. If a list is passed instead,
+        they are mapped to the respective actors. In that case, the list of
+        (keyword) arguments must be the same length as the list of actors.
+
+        This method accepts two optional callbacks that will be invoked when
+        their respective events are triggered.
+
+        The ``on_result`` callback is triggered when a task resolves successfully.
+        It should accept two arguments: The actor for which the
+        task resolved, and the result received from the remote call.
+
+        The ``on_error`` callback is triggered when a task fails.
+        It should accept two arguments: The actor for which the
+        task threw an error, and the exception.
+
+        Args:
+            tracked_actors: List of actors to schedule tasks on.
+            method_name: Remote actor method to invoke on the actors. If this is
+                e.g. ``foo``, then ``actor.foo.remote(*args, **kwargs)`` will be
+                scheduled on all actors.
+            args: Arguments to pass to the task.
+            kwargs: Keyword arguments to pass to the task.
+            on_result: Callback to invoke when the task resolves.
+            on_error: Callback to invoke when the task fails.
+
+        """
+        if not isinstance(args, List):
+            args_list = [args] * len(tracked_actors)
+        else:
+            if len(tracked_actors) != len(args):
+                raise ValueError(
+                    f"Length of args must be the same as tracked_actors "
+                    f"list. Got `len(kwargs)={len(kwargs)}` and "
+                    f"`len(tracked_actors)={len(tracked_actors)}"
+                )
+            args_list = args
+
+        if not isinstance(kwargs, List):
+            kwargs_list = [kwargs] * len(tracked_actors)
+        else:
+            if len(tracked_actors) != len(kwargs):
+                raise ValueError(
+                    f"Length of kwargs must be the same as tracked_actors "
+                    f"list. Got `len(args)={len(args)}` and "
+                    f"`len(tracked_actors)={len(tracked_actors)}"
+                )
+            kwargs_list = kwargs
+
+        for tracked_actor, args, kwargs in zip(tracked_actors, args_list, kwargs_list):
+            self.schedule_actor_task(
+                tracked_actor=tracked_actor,
+                method_name=method_name,
+                args=args,
+                kwargs=kwargs,
+                on_result=on_result,
+                on_error=on_error,
+            )
+
+    def clear_actor_task_futures(self, tracked_actor: TrackedActor):
+        """Discard all actor task futures from a tracked actor."""
+        futures = self._tracked_actors_to_task_futures.pop(tracked_actor, [])
+        for future in futures:
+            self._actor_task_events.discard_future(future)
+
+    def _cleanup_actor_futures(self, tracked_actor: TrackedActor):
+        # Remove all actor task futures
+        self.clear_actor_task_futures(tracked_actor=tracked_actor)
+
+        # Remove all actor state futures
+        futures = self._tracked_actors_to_state_futures.pop(tracked_actor, [])
+        for future in futures:
+            self._actor_state_events.discard_future(future)
+
+    def cleanup(self):
+        for (
+            actor,
+            acquired_resources,
+        ) in self._live_actors_to_ray_actors_resources.values():
+            ray.kill(actor)
+            self._resource_manager.free_resources(acquired_resources)
+
+        for (
+            resource_request,
+            pending_actors,
+        ) in self._resource_request_to_pending_actors.items():
+            for i in range(len(pending_actors)):
+                self._resource_manager.cancel_resource_request(resource_request)
+
+        self._resource_manager.clear()
+
+        self.__init__(resource_manager=self._resource_manager)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/barrier.py b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/barrier.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cb0c5a2bf55242281090a1cc6bcac9009face52
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/barrier.py
@@ -0,0 +1,93 @@
+from typing import Any, Callable, List, Optional, Tuple
+
+
+class Barrier:
+    """Barrier to collect results and process them in bulk.
+
+    A barrier can be used to collect multiple results and process them in bulk once
+    a certain count or a timeout is reached.
+
+    For instance, if ``max_results=N``, the ``on_completion`` callback will be
+    invoked once :meth:`arrive` has been called ``N`` times.
+
+    The completion callback will only be invoked once, even if more results
+    arrive after completion. The collected results can be resetted
+    with :meth:`reset`, after which the callback may be invoked again.
+
+    The completion callback should expect one argument, which is the barrier
+    object that completed.
+
+    Args:
+        max_results: Maximum number of results to collect before a call to
+            :meth:`wait` resolves or the :meth:`on_completion` callback is invoked.
+        on_completion: Callback to invoke when ``max_results`` results
+            arrived at the barrier.
+
+    """
+
+    def __init__(
+        self,
+        max_results: int,
+        *,
+        on_completion: Optional[Callable[["Barrier"], None]] = None,
+    ):
+        self._max_results = max_results
+
+        # on_completion callback
+        self._completed = False
+        self._on_completion = on_completion
+
+        # Collect received results
+        self._results: List[Tuple[Any]] = []
+
+    def arrive(self, *data):
+        """Notify barrier that a result successfully arrived.
+
+        This will count against the ``max_results`` limit. The received result
+        will be included in a call to :meth:`get_results`.
+
+        Args:
+            *data: Result data to be cached. Can be obtained via :meth:`get_results`.
+
+        """
+        if len(data) == 1:
+            data = data[0]
+
+        self._results.append(data)
+        self._check_completion()
+
+    def _check_completion(self):
+        if self._completed:
+            # Already fired completion callback
+            return
+
+        if self.num_results >= self._max_results:
+            # Barrier is complete
+            self._completed = True
+
+            if self._on_completion:
+                self._on_completion(self)
+
+    @property
+    def completed(self) -> bool:
+        """Returns True if the barrier is completed."""
+        return self._completed
+
+    @property
+    def num_results(self) -> int:
+        """Number of received (successful) results."""
+        return len(self._results)
+
+    def get_results(self) -> List[Tuple[Any]]:
+        """Return list of received results."""
+        return self._results
+
+    def reset(self) -> None:
+        """Reset barrier, removing all received results.
+
+        Resetting the barrier will reset the completion status. When ``max_results``
+        is set and enough new events arrive after resetting, the
+        :meth:`on_completion` callback will be invoked again.
+        """
+        self._completed = False
+        self._results = []
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbea6f72d2925b5435931e58203d206bd0009709
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py
@@ -0,0 +1,148 @@
+import random
+from typing import Any, Callable, Dict, Iterable, Optional, Set, Tuple, Union
+
+import ray
+
+_ResultCallback = Callable[[Any], None]
+_ErrorCallback = Callable[[Exception], None]
+
+
+class RayEventManager:
+    """Event manager for Ray futures.
+
+    The event manager can be used to track futures and invoke callbacks when
+    they resolve.
+
+    Futures are tracked with :meth:`track_future`. Future can then be awaited with
+    :meth:`wait`. When futures successfully resolve, they trigger an optional
+    ``on_result`` callback that can be passed to :meth:`track_future`. If they
+    fail, they trigger an optional ``on_error`` callback.
+
+    Args:
+        shuffle_futures: If True, futures will be shuffled before awaited. This
+            will avoid implicit prioritization of futures within Ray.
+    """
+
+    def __init__(self, shuffle_futures: bool = True):
+        self._shuffle_futures = shuffle_futures
+
+        # Map of futures to callbacks (result, error)
+        self._tracked_futures: Dict[
+            ray.ObjectRef, Tuple[Optional[_ResultCallback], Optional[_ErrorCallback]]
+        ] = {}
+
+    def track_future(
+        self,
+        future: ray.ObjectRef,
+        on_result: Optional[_ResultCallback] = None,
+        on_error: Optional[_ErrorCallback] = None,
+    ):
+        """Track a single future and invoke callbacks on resolution.
+
+        Control has to be yielded to the event manager for the callbacks to
+        be invoked, either via :meth:`wait` or via :meth:`resolve_future`.
+
+        Args:
+            future: Ray future to await.
+            on_result: Callback to invoke when the future resolves successfully.
+            on_error: Callback to invoke when the future fails.
+
+        """
+        self._tracked_futures[future] = (on_result, on_error)
+
+    def track_futures(
+        self,
+        futures: Iterable[ray.ObjectRef],
+        on_result: Optional[_ResultCallback] = None,
+        on_error: Optional[_ErrorCallback] = None,
+    ):
+        """Track multiple futures and invoke callbacks on resolution.
+
+        Control has to be yielded to the event manager for the callbacks to
+        be invoked, either via :meth:`wait` or via :meth:`resolve_future`.
+
+        Args:
+            futures: Ray futures to await.
+            on_result: Callback to invoke when the future resolves successfully.
+            on_error: Callback to invoke when the future fails.
+
+        """
+        for future in futures:
+            self.track_future(future, on_result=on_result, on_error=on_error)
+
+    def discard_future(self, future: ray.ObjectRef):
+        """Remove future from tracking.
+
+        The future will not be awaited anymore, and it will not trigger any callbacks.
+
+        Args:
+            future: Ray futures to discard.
+        """
+        self._tracked_futures.pop(future, None)
+
+    def get_futures(self) -> Set[ray.ObjectRef]:
+        """Get futures tracked by the event manager."""
+        return set(self._tracked_futures)
+
+    @property
+    def num_futures(self) -> int:
+        return len(self._tracked_futures)
+
+    def resolve_future(self, future: ray.ObjectRef):
+        """Resolve a single future.
+
+        This method will block until the future is available. It will then
+        trigger the callback associated to the future and the event (success
+        or error), if specified.
+
+        Args:
+            future: Ray future to resolve.
+
+        """
+        try:
+            on_result, on_error = self._tracked_futures.pop(future)
+        except KeyError as e:
+            raise ValueError(
+                f"Future {future} is not tracked by this RayEventManager"
+            ) from e
+
+        try:
+            result = ray.get(future)
+        except Exception as e:
+            if on_error:
+                on_error(e)
+            else:
+                raise e
+        else:
+            if on_result:
+                on_result(result)
+
+    def wait(
+        self,
+        timeout: Optional[Union[float, int]] = None,
+        num_results: Optional[int] = 1,
+    ):
+        """Wait up to ``timeout`` seconds for ``num_results`` futures to resolve.
+
+        If ``timeout=None``, this method will block until all `num_results`` futures
+        resolve. If ``num_results=None``, this method will await all tracked futures.
+
+        For every future that resolves, the respective associated callbacks will be
+        invoked.
+
+        Args:
+            timeout: Timeout in second to wait for futures to resolve.
+            num_results: Number of futures to await. If ``None``, will wait for
+                all tracked futures to resolve.
+
+        """
+        futures = list(self.get_futures())
+
+        if self._shuffle_futures:
+            random.shuffle(futures)
+
+        num_results = num_results or len(futures)
+
+        ready, _ = ray.wait(list(futures), timeout=timeout, num_returns=num_results)
+        for future in ready:
+            self.resolve_future(future)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/tracked_actor.py b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/tracked_actor.py
new file mode 100644
index 0000000000000000000000000000000000000000..91abd0556e40b197ea82ac8ac55b53613ef635cc
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/tracked_actor.py
@@ -0,0 +1,54 @@
+from typing import Callable, Optional
+
+
+class TrackedActor:
+    """Actor tracked by an actor manager.
+
+    This object is used to reference a Ray actor on an actor manager
+
+    Existence of this object does not mean that the Ray actor has already been started.
+    Actor state can be inquired from the actor manager tracking the Ray actor.
+
+    Note:
+        Objects of this class are returned by the :class:`RayActorManager`.
+        This class should not be instantiated manually.
+
+    Attributes:
+        actor_id: ID for identification of the actor within the actor manager. This
+            ID is not related to the Ray actor ID.
+
+    """
+
+    def __init__(
+        self,
+        actor_id: int,
+        on_start: Optional[Callable[["TrackedActor"], None]] = None,
+        on_stop: Optional[Callable[["TrackedActor"], None]] = None,
+        on_error: Optional[Callable[["TrackedActor", Exception], None]] = None,
+    ):
+        self.actor_id = actor_id
+        self._on_start = on_start
+        self._on_stop = on_stop
+        self._on_error = on_error
+
+    def set_on_start(self, on_start: Optional[Callable[["TrackedActor"], None]]):
+        self._on_start = on_start
+
+    def set_on_stop(self, on_stop: Optional[Callable[["TrackedActor"], None]]):
+        self._on_stop = on_stop
+
+    def set_on_error(
+        self, on_error: Optional[Callable[["TrackedActor", Exception], None]]
+    ):
+        self._on_error = on_error
+
+    def __repr__(self):
+        return f"<TrackedActor {self.actor_id}>"
+
+    def __eq__(self, other):
+        if not isinstance(other, self.__class__):
+            return False
+        return self.actor_id == other.actor_id
+
+    def __hash__(self):
+        return hash(self.actor_id)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/tracked_actor_task.py b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/tracked_actor_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fb21cbfb514924d8d38267c4e63321a27f9cb41
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/tracked_actor_task.py
@@ -0,0 +1,42 @@
+from typing import Any, Callable, Optional
+
+from ray.air.execution._internal.tracked_actor import TrackedActor
+
+
+class TrackedActorTask:
+    """Actor task tracked by a Ray event manager.
+
+    This container class is used to define callbacks to be invoked when
+    the task resolves, errors, or times out.
+
+    Note:
+        Objects of this class are returned by the :class:`RayActorManager`.
+        This class should not be instantiated manually.
+
+    Args:
+        tracked_actor: Tracked actor object this task is scheduled on.
+        on_result: Callback to invoke when the task resolves.
+        on_error: Callback to invoke when the task fails.
+
+    Example:
+
+        .. code-block:: python
+
+            tracked_futures = actor_manager.schedule_actor_tasks(
+                actor_manager.live_actors,
+                "foo",
+                on_result=lambda actor, result: print(result)
+                )
+
+    """
+
+    def __init__(
+        self,
+        tracked_actor: TrackedActor,
+        on_result: Optional[Callable[[TrackedActor, Any], None]] = None,
+        on_error: Optional[Callable[[TrackedActor, Exception], None]] = None,
+    ):
+        self._tracked_actor = tracked_actor
+
+        self._on_result = on_result
+        self._on_error = on_error
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c7d19fa7e99717c8266007283cab61e6451247f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/integrations/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..507e8fddcef9ea6d84b614f984dc10e705870ed8
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/comet.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/comet.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04097100733b87d5d71e6be7a3352c3d94876c81
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/comet.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/keras.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/keras.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46e40396d1dbc6b2bea6b807a3cfcf77669ddee3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/keras.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/mlflow.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/mlflow.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df86b9b7eba23af99ffefaba6f98c960f4b953e3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/mlflow.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/wandb.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/wandb.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8582e379cc95b9cbe9bf4af6023bafa4219ed67d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/integrations/__pycache__/wandb.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/keras.py b/.venv/lib/python3.11/site-packages/ray/air/integrations/keras.py
new file mode 100644
index 0000000000000000000000000000000000000000..677213e73dedf7fc16a6199c7ac133b35c4b3827
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/integrations/keras.py
@@ -0,0 +1,185 @@
+import shutil
+from typing import Dict, List, Optional, Union
+
+from tensorflow.keras.callbacks import Callback as KerasCallback
+
+import ray
+from ray.train.tensorflow import TensorflowCheckpoint
+from ray.util.annotations import PublicAPI
+
+
+class _Callback(KerasCallback):
+    """Base class for Air's Keras callbacks."""
+
+    _allowed = [
+        "epoch_begin",
+        "epoch_end",
+        "train_batch_begin",
+        "train_batch_end",
+        "test_batch_begin",
+        "test_batch_end",
+        "predict_batch_begin",
+        "predict_batch_end",
+        "train_begin",
+        "train_end",
+        "test_begin",
+        "test_end",
+        "predict_begin",
+        "predict_end",
+    ]
+
+    def __init__(self, on: Union[str, List[str]] = "validation_end"):
+        super(_Callback, self).__init__()
+
+        if not isinstance(on, list):
+            on = [on]
+        if any(w not in self._allowed for w in on):
+            raise ValueError(
+                "Invalid trigger time selected: {}. Must be one of {}".format(
+                    on, self._allowed
+                )
+            )
+        self._on = on
+
+    def _handle(self, logs: Dict, when: str):
+        raise NotImplementedError
+
+    def on_epoch_begin(self, epoch, logs=None):
+        if "epoch_begin" in self._on:
+            self._handle(logs, "epoch_begin")
+
+    def on_epoch_end(self, epoch, logs=None):
+        if "epoch_end" in self._on:
+            self._handle(logs, "epoch_end")
+
+    def on_train_batch_begin(self, batch, logs=None):
+        if "train_batch_begin" in self._on:
+            self._handle(logs, "train_batch_begin")
+
+    def on_train_batch_end(self, batch, logs=None):
+        if "train_batch_end" in self._on:
+            self._handle(logs, "train_batch_end")
+
+    def on_test_batch_begin(self, batch, logs=None):
+        if "test_batch_begin" in self._on:
+            self._handle(logs, "test_batch_begin")
+
+    def on_test_batch_end(self, batch, logs=None):
+        if "test_batch_end" in self._on:
+            self._handle(logs, "test_batch_end")
+
+    def on_predict_batch_begin(self, batch, logs=None):
+        if "predict_batch_begin" in self._on:
+            self._handle(logs, "predict_batch_begin")
+
+    def on_predict_batch_end(self, batch, logs=None):
+        if "predict_batch_end" in self._on:
+            self._handle(logs, "predict_batch_end")
+
+    def on_train_begin(self, logs=None):
+        if "train_begin" in self._on:
+            self._handle(logs, "train_begin")
+
+    def on_train_end(self, logs=None):
+        if "train_end" in self._on:
+            self._handle(logs, "train_end")
+
+    def on_test_begin(self, logs=None):
+        if "test_begin" in self._on:
+            self._handle(logs, "test_begin")
+
+    def on_test_end(self, logs=None):
+        if "test_end" in self._on:
+            self._handle(logs, "test_end")
+
+    def on_predict_begin(self, logs=None):
+        if "predict_begin" in self._on:
+            self._handle(logs, "predict_begin")
+
+    def on_predict_end(self, logs=None):
+        if "predict_end" in self._on:
+            self._handle(logs, "predict_end")
+
+
+@PublicAPI(stability="alpha")
+class ReportCheckpointCallback(_Callback):
+    """Keras callback for Ray Train reporting and checkpointing.
+
+    .. note::
+        Metrics are always reported with checkpoints, even if the event isn't specified
+        in ``report_metrics_on``.
+
+    Example:
+        .. code-block:: python
+
+            ############# Using it in TrainSession ###############
+            from ray.air.integrations.keras import ReportCheckpointCallback
+            def train_loop_per_worker():
+                strategy = tf.distribute.MultiWorkerMirroredStrategy()
+                with strategy.scope():
+                    model = build_model()
+
+                model.fit(dataset_shard, callbacks=[ReportCheckpointCallback()])
+
+    Args:
+        metrics: Metrics to report. If this is a list, each item describes
+            the metric key reported to Keras, and it's reported under the
+            same name. If this is a dict, each key is the name reported
+            and the respective value is the metric key reported to Keras.
+            If this is None, all Keras logs are reported.
+        report_metrics_on: When to report metrics. Must be one of
+            the Keras event hooks (less the ``on_``), e.g.
+            "train_start" or "predict_end". Defaults to "epoch_end".
+        checkpoint_on: When to save checkpoints. Must be one of the Keras event hooks
+            (less the ``on_``), e.g. "train_start" or "predict_end". Defaults to
+            "epoch_end".
+    """
+
+    def __init__(
+        self,
+        checkpoint_on: Union[str, List[str]] = "epoch_end",
+        report_metrics_on: Union[str, List[str]] = "epoch_end",
+        metrics: Optional[Union[str, List[str], Dict[str, str]]] = None,
+    ):
+        if isinstance(checkpoint_on, str):
+            checkpoint_on = [checkpoint_on]
+        if isinstance(report_metrics_on, str):
+            report_metrics_on = [report_metrics_on]
+
+        on = list(set(checkpoint_on + report_metrics_on))
+        super().__init__(on=on)
+
+        self._checkpoint_on: List[str] = checkpoint_on
+        self._report_metrics_on: List[str] = report_metrics_on
+        self._metrics = metrics
+
+    def _handle(self, logs: Dict, when: str):
+        assert when in self._checkpoint_on or when in self._report_metrics_on
+
+        metrics = self._get_reported_metrics(logs)
+
+        should_checkpoint = when in self._checkpoint_on
+        if should_checkpoint:
+            checkpoint = TensorflowCheckpoint.from_model(self.model)
+            ray.train.report(metrics, checkpoint=checkpoint)
+            # Clean up temporary checkpoint
+            shutil.rmtree(checkpoint.path, ignore_errors=True)
+        else:
+            ray.train.report(metrics, checkpoint=None)
+
+    def _get_reported_metrics(self, logs: Dict) -> Dict:
+        assert isinstance(self._metrics, (type(None), str, list, dict))
+
+        if self._metrics is None:
+            reported_metrics = logs
+        elif isinstance(self._metrics, str):
+            reported_metrics = {self._metrics: logs[self._metrics]}
+        elif isinstance(self._metrics, list):
+            reported_metrics = {metric: logs[metric] for metric in self._metrics}
+        elif isinstance(self._metrics, dict):
+            reported_metrics = {
+                key: logs[metric] for key, metric in self._metrics.items()
+            }
+
+        assert isinstance(reported_metrics, dict)
+        return reported_metrics
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/mlflow.py b/.venv/lib/python3.11/site-packages/ray/air/integrations/mlflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..21bface0c910ad6ee03f078ec3010c9e0a433726
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/integrations/mlflow.py
@@ -0,0 +1,325 @@
+import logging
+from types import ModuleType
+from typing import Dict, Optional, Union
+
+import ray
+from ray.air._internal import usage as air_usage
+from ray.air._internal.mlflow import _MLflowLoggerUtil
+from ray.air.constants import TRAINING_ITERATION
+from ray.tune.experiment import Trial
+from ray.tune.logger import LoggerCallback
+from ray.tune.result import TIMESTEPS_TOTAL
+from ray.util.annotations import PublicAPI
+
+try:
+    import mlflow
+except ImportError:
+    mlflow = None
+
+
+logger = logging.getLogger(__name__)
+
+
+class _NoopModule:
+    def __getattr__(self, item):
+        return _NoopModule()
+
+    def __call__(self, *args, **kwargs):
+        return None
+
+
+@PublicAPI(stability="alpha")
+def setup_mlflow(
+    config: Optional[Dict] = None,
+    tracking_uri: Optional[str] = None,
+    registry_uri: Optional[str] = None,
+    experiment_id: Optional[str] = None,
+    experiment_name: Optional[str] = None,
+    tracking_token: Optional[str] = None,
+    artifact_location: Optional[str] = None,
+    run_name: Optional[str] = None,
+    create_experiment_if_not_exists: bool = False,
+    tags: Optional[Dict] = None,
+    rank_zero_only: bool = True,
+) -> Union[ModuleType, _NoopModule]:
+    """Set up a MLflow session.
+
+    This function can be used to initialize an MLflow session in a
+    (distributed) training or tuning run. The session will be created on the trainable.
+
+    By default, the MLflow experiment ID is the Ray trial ID and the
+    MLlflow experiment name is the Ray trial name. These settings can be overwritten by
+    passing the respective keyword arguments.
+
+    The ``config`` dict is automatically logged as the run parameters (excluding the
+    mlflow settings).
+
+    In distributed training with Ray Train, only the zero-rank worker will initialize
+    mlflow. All other workers will return a noop client, so that logging is not
+    duplicated in a distributed run. This can be disabled by passing
+    ``rank_zero_only=False``, which will then initialize mlflow in every training
+    worker.
+
+    This function will return the ``mlflow`` module or a noop module for
+    non-rank zero workers ``if rank_zero_only=True``. By using
+    ``mlflow = setup_mlflow(config)`` you can ensure that only the rank zero worker
+    calls the mlflow API.
+
+    Args:
+        config: Configuration dict to be logged to mlflow as parameters.
+        tracking_uri: The tracking URI for MLflow tracking. If using
+            Tune in a multi-node setting, make sure to use a remote server for
+            tracking.
+        registry_uri: The registry URI for the MLflow model registry.
+        experiment_id: The id of an already created MLflow experiment.
+            All logs from all trials in ``tune.Tuner()`` will be reported to this
+            experiment. If this is not provided or the experiment with this
+            id does not exist, you must provide an``experiment_name``. This
+            parameter takes precedence over ``experiment_name``.
+        experiment_name: The name of an already existing MLflow
+            experiment. All logs from all trials in ``tune.Tuner()`` will be
+            reported to this experiment. If this is not provided, you must
+            provide a valid ``experiment_id``.
+        tracking_token: A token to use for HTTP authentication when
+            logging to a remote tracking server. This is useful when you
+            want to log to a Databricks server, for example. This value will
+            be used to set the MLFLOW_TRACKING_TOKEN environment variable on
+            all the remote training processes.
+        artifact_location: The location to store run artifacts.
+            If not provided, MLFlow picks an appropriate default.
+            Ignored if experiment already exists.
+        run_name: Name of the new MLflow run that will be created.
+            If not set, will default to the ``experiment_name``.
+        create_experiment_if_not_exists: Whether to create an
+            experiment with the provided name if it does not already
+            exist. Defaults to False.
+        tags: Tags to set for the new run.
+        rank_zero_only: If True, will return an initialized session only for the
+            rank 0 worker in distributed training. If False, will initialize a
+            session for all workers. Defaults to True.
+
+    Example:
+
+        Per default, you can just call ``setup_mlflow`` and continue to use
+        MLflow like you would normally do:
+
+        .. code-block:: python
+
+            from ray.air.integrations.mlflow import setup_mlflow
+
+            def training_loop(config):
+                mlflow = setup_mlflow(config)
+                # ...
+                mlflow.log_metric(key="loss", val=0.123, step=0)
+
+        In distributed data parallel training, you can utilize the return value of
+        ``setup_mlflow``. This will make sure it is only invoked on the first worker
+        in distributed training runs.
+
+        .. code-block:: python
+
+            from ray.air.integrations.mlflow import setup_mlflow
+
+            def training_loop(config):
+                mlflow = setup_mlflow(config)
+                # ...
+                mlflow.log_metric(key="loss", val=0.123, step=0)
+
+
+        You can also use MlFlow's autologging feature if using a training
+        framework like Pytorch Lightning, XGBoost, etc. More information can be
+        found here
+        (https://mlflow.org/docs/latest/tracking.html#automatic-logging).
+
+        .. code-block:: python
+
+            from ray.air.integrations.mlflow import setup_mlflow
+
+            def train_fn(config):
+                mlflow = setup_mlflow(config)
+                mlflow.autolog()
+                xgboost_results = xgb.train(config, ...)
+
+    """
+    if not mlflow:
+        raise RuntimeError(
+            "mlflow was not found - please install with `pip install mlflow`"
+        )
+
+    try:
+        train_context = ray.train.get_context()
+
+        # Do a try-catch here if we are not in a train session
+        if rank_zero_only and train_context.get_world_rank() != 0:
+            return _NoopModule()
+
+        default_trial_id = train_context.get_trial_id()
+        default_trial_name = train_context.get_trial_name()
+
+    except RuntimeError:
+        default_trial_id = None
+        default_trial_name = None
+
+    _config = config.copy() if config else {}
+
+    experiment_id = experiment_id or default_trial_id
+    experiment_name = experiment_name or default_trial_name
+
+    # Setup mlflow
+    mlflow_util = _MLflowLoggerUtil()
+    mlflow_util.setup_mlflow(
+        tracking_uri=tracking_uri,
+        registry_uri=registry_uri,
+        experiment_id=experiment_id,
+        experiment_name=experiment_name,
+        tracking_token=tracking_token,
+        artifact_location=artifact_location,
+        create_experiment_if_not_exists=create_experiment_if_not_exists,
+    )
+
+    mlflow_util.start_run(
+        run_name=run_name or experiment_name,
+        tags=tags,
+        set_active=True,
+    )
+    mlflow_util.log_params(_config)
+
+    # Record `setup_mlflow` usage when everything has setup successfully.
+    air_usage.tag_setup_mlflow()
+
+    return mlflow_util._mlflow
+
+
+class MLflowLoggerCallback(LoggerCallback):
+    """MLflow Logger to automatically log Tune results and config to MLflow.
+
+    MLflow (https://mlflow.org) Tracking is an open source library for
+    recording and querying experiments. This Ray Tune ``LoggerCallback``
+    sends information (config parameters, training results & metrics,
+    and artifacts) to MLflow for automatic experiment tracking.
+
+    Keep in mind that the callback will open an MLflow session on the driver and
+    not on the trainable. Therefore, it is not possible to call MLflow functions
+    like ``mlflow.log_figure()`` inside the trainable as there is no MLflow session
+    on the trainable. For more fine grained control, use
+    :func:`ray.air.integrations.mlflow.setup_mlflow`.
+
+    Args:
+        tracking_uri: The tracking URI for where to manage experiments
+            and runs. This can either be a local file path or a remote server.
+            This arg gets passed directly to mlflow
+            initialization. When using Tune in a multi-node setting, make sure
+            to set this to a remote server and not a local file path.
+        registry_uri: The registry URI that gets passed directly to
+            mlflow initialization.
+        experiment_name: The experiment name to use for this Tune run.
+            If the experiment with the name already exists with MLflow,
+            it will be reused. If not, a new experiment will be created with
+            that name.
+        tags: An optional dictionary of string keys and values to set
+            as tags on the run
+        tracking_token: Tracking token used to authenticate with MLflow.
+        save_artifact: If set to True, automatically save the entire
+            contents of the Tune local_dir as an artifact to the
+            corresponding run in MlFlow.
+
+    Example:
+
+    .. code-block:: python
+
+        from ray.air.integrations.mlflow import MLflowLoggerCallback
+
+        tags = { "user_name" : "John",
+                 "git_commit_hash" : "abc123"}
+
+        tune.run(
+            train_fn,
+            config={
+                # define search space here
+                "parameter_1": tune.choice([1, 2, 3]),
+                "parameter_2": tune.choice([4, 5, 6]),
+            },
+            callbacks=[MLflowLoggerCallback(
+                experiment_name="experiment1",
+                tags=tags,
+                save_artifact=True)])
+
+    """
+
+    def __init__(
+        self,
+        tracking_uri: Optional[str] = None,
+        *,
+        registry_uri: Optional[str] = None,
+        experiment_name: Optional[str] = None,
+        tags: Optional[Dict] = None,
+        tracking_token: Optional[str] = None,
+        save_artifact: bool = False,
+    ):
+
+        self.tracking_uri = tracking_uri
+        self.registry_uri = registry_uri
+        self.experiment_name = experiment_name
+        self.tags = tags
+        self.tracking_token = tracking_token
+        self.should_save_artifact = save_artifact
+
+        self.mlflow_util = _MLflowLoggerUtil()
+
+        if ray.util.client.ray.is_connected():
+            logger.warning(
+                "When using MLflowLoggerCallback with Ray Client, "
+                "it is recommended to use a remote tracking "
+                "server. If you are using a MLflow tracking server "
+                "backed by the local filesystem, then it must be "
+                "setup on the server side and not on the client "
+                "side."
+            )
+
+    def setup(self, *args, **kwargs):
+        # Setup the mlflow logging util.
+        self.mlflow_util.setup_mlflow(
+            tracking_uri=self.tracking_uri,
+            registry_uri=self.registry_uri,
+            experiment_name=self.experiment_name,
+            tracking_token=self.tracking_token,
+        )
+
+        if self.tags is None:
+            # Create empty dictionary for tags if not given explicitly
+            self.tags = {}
+
+        self._trial_runs = {}
+
+    def log_trial_start(self, trial: "Trial"):
+        # Create run if not already exists.
+        if trial not in self._trial_runs:
+
+            # Set trial name in tags
+            tags = self.tags.copy()
+            tags["trial_name"] = str(trial)
+
+            run = self.mlflow_util.start_run(tags=tags, run_name=str(trial))
+            self._trial_runs[trial] = run.info.run_id
+
+        run_id = self._trial_runs[trial]
+
+        # Log the config parameters.
+        config = trial.config
+        self.mlflow_util.log_params(run_id=run_id, params_to_log=config)
+
+    def log_trial_result(self, iteration: int, trial: "Trial", result: Dict):
+        step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]
+        run_id = self._trial_runs[trial]
+        self.mlflow_util.log_metrics(run_id=run_id, metrics_to_log=result, step=step)
+
+    def log_trial_end(self, trial: "Trial", failed: bool = False):
+        run_id = self._trial_runs[trial]
+
+        # Log the artifact if set_artifact is set to True.
+        if self.should_save_artifact:
+            self.mlflow_util.save_artifacts(run_id=run_id, dir=trial.local_path)
+
+        # Stop the run once trial finishes.
+        status = "FINISHED" if not failed else "FAILED"
+        self.mlflow_util.end_run(run_id=run_id, status=status)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/integrations/wandb.py b/.venv/lib/python3.11/site-packages/ray/air/integrations/wandb.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d220211dd622dcfa00ec33b8c80fa31bd66d624
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/integrations/wandb.py
@@ -0,0 +1,749 @@
+import enum
+import os
+import pickle
+import urllib
+import warnings
+from numbers import Number
+from types import ModuleType
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import pyarrow.fs
+
+import ray
+from ray import logger
+from ray._private.storage import _load_class
+from ray.air._internal import usage as air_usage
+from ray.air.constants import TRAINING_ITERATION
+from ray.air.util.node import _force_on_current_node
+from ray.train._internal.session import get_session
+from ray.train._internal.syncer import DEFAULT_SYNC_TIMEOUT
+from ray.tune.experiment import Trial
+from ray.tune.logger import LoggerCallback
+from ray.tune.utils import flatten_dict
+from ray.util import PublicAPI
+from ray.util.queue import Queue
+
+try:
+    import wandb
+    from wandb.sdk.data_types.base_types.wb_value import WBValue
+    from wandb.sdk.data_types.image import Image
+    from wandb.sdk.data_types.video import Video
+    from wandb.sdk.lib.disabled import RunDisabled
+    from wandb.util import json_dumps_safer
+    from wandb.wandb_run import Run
+except ImportError:
+    wandb = json_dumps_safer = Run = RunDisabled = WBValue = None
+
+
+WANDB_ENV_VAR = "WANDB_API_KEY"
+WANDB_PROJECT_ENV_VAR = "WANDB_PROJECT_NAME"
+WANDB_GROUP_ENV_VAR = "WANDB_GROUP_NAME"
+WANDB_MODE_ENV_VAR = "WANDB_MODE"
+# Hook that is invoked before wandb.init in the setup method of WandbLoggerCallback
+# to populate the API key if it isn't already set when initializing the callback.
+# It doesn't take in any arguments and returns the W&B API key.
+# Example: "your.module.wandb_setup_api_key_hook".
+WANDB_SETUP_API_KEY_HOOK = "WANDB_SETUP_API_KEY_HOOK"
+# Hook that is invoked before wandb.init in the setup method of WandbLoggerCallback
+# to populate environment variables to specify the location
+# (project and group) of the W&B run.
+# It doesn't take in any arguments and doesn't return anything, but it does populate
+# WANDB_PROJECT_NAME and WANDB_GROUP_NAME.
+# Example: "your.module.wandb_populate_run_location_hook".
+WANDB_POPULATE_RUN_LOCATION_HOOK = "WANDB_POPULATE_RUN_LOCATION_HOOK"
+# Hook that is invoked after running wandb.init in WandbLoggerCallback
+# to process information about the W&B run.
+# It takes in a W&B run object and doesn't return anything.
+# Example: "your.module.wandb_process_run_info_hook".
+WANDB_PROCESS_RUN_INFO_HOOK = "WANDB_PROCESS_RUN_INFO_HOOK"
+
+
+@PublicAPI(stability="alpha")
+def setup_wandb(
+    config: Optional[Dict] = None,
+    api_key: Optional[str] = None,
+    api_key_file: Optional[str] = None,
+    rank_zero_only: bool = True,
+    **kwargs,
+) -> Union[Run, RunDisabled]:
+    """Set up a Weights & Biases session.
+
+    This function can be used to initialize a Weights & Biases session in a
+    (distributed) training or tuning run.
+
+    By default, the run ID is the trial ID, the run name is the trial name, and
+    the run group is the experiment name. These settings can be overwritten by
+    passing the respective arguments as ``kwargs``, which will be passed to
+    ``wandb.init()``.
+
+    In distributed training with Ray Train, only the zero-rank worker will initialize
+    wandb. All other workers will return a disabled run object, so that logging is not
+    duplicated in a distributed run. This can be disabled by passing
+    ``rank_zero_only=False``, which will then initialize wandb in every training
+    worker.
+
+    The ``config`` argument will be passed to Weights and Biases and will be logged
+    as the run configuration.
+
+    If no API key or key file are passed, wandb will try to authenticate
+    using locally stored credentials, created for instance by running ``wandb login``.
+
+    Keyword arguments passed to ``setup_wandb()`` will be passed to
+    ``wandb.init()`` and take precedence over any potential default settings.
+
+    Args:
+        config: Configuration dict to be logged to Weights and Biases. Can contain
+            arguments for ``wandb.init()`` as well as authentication information.
+        api_key: API key to use for authentication with Weights and Biases.
+        api_key_file: File pointing to API key for with Weights and Biases.
+        rank_zero_only: If True, will return an initialized session only for the
+            rank 0 worker in distributed training. If False, will initialize a
+            session for all workers.
+        kwargs: Passed to ``wandb.init()``.
+
+    Example:
+
+        .. code-block:: python
+
+            from ray.air.integrations.wandb import setup_wandb
+
+            def training_loop(config):
+                wandb = setup_wandb(config)
+                # ...
+                wandb.log({"loss": 0.123})
+
+    """
+    if not wandb:
+        raise RuntimeError(
+            "Wandb was not found - please install with `pip install wandb`"
+        )
+
+    default_trial_id = None
+    default_trial_name = None
+    default_experiment_name = None
+
+    # Do a try-catch here if we are not in a train session
+    session = get_session()
+    if session and rank_zero_only and session.world_rank in (None, 0):
+        return RunDisabled()
+
+    if session:
+        default_trial_id = session.trial_id
+        default_trial_name = session.trial_name
+        default_experiment_name = session.experiment_name
+
+    # Default init kwargs
+    wandb_init_kwargs = {
+        "trial_id": kwargs.get("trial_id") or default_trial_id,
+        "trial_name": kwargs.get("trial_name") or default_trial_name,
+        "group": kwargs.get("group") or default_experiment_name,
+    }
+    # Passed kwargs take precedence over default kwargs
+    wandb_init_kwargs.update(kwargs)
+
+    return _setup_wandb(
+        config=config, api_key=api_key, api_key_file=api_key_file, **wandb_init_kwargs
+    )
+
+
+def _setup_wandb(
+    trial_id: str,
+    trial_name: str,
+    config: Optional[Dict] = None,
+    api_key: Optional[str] = None,
+    api_key_file: Optional[str] = None,
+    _wandb: Optional[ModuleType] = None,
+    **kwargs,
+) -> Union[Run, RunDisabled]:
+    _config = config.copy() if config else {}
+
+    # If key file is specified, set
+    if api_key_file:
+        api_key_file = os.path.expanduser(api_key_file)
+
+    _set_api_key(api_key_file, api_key)
+    project = _get_wandb_project(kwargs.pop("project", None))
+    group = kwargs.pop("group", os.environ.get(WANDB_GROUP_ENV_VAR))
+
+    # Remove unpickleable items.
+    _config = _clean_log(_config)
+
+    wandb_init_kwargs = dict(
+        id=trial_id,
+        name=trial_name,
+        resume=True,
+        reinit=True,
+        allow_val_change=True,
+        config=_config,
+        project=project,
+        group=group,
+    )
+
+    # Update config (e.g. set any other parameters in the call to wandb.init)
+    wandb_init_kwargs.update(**kwargs)
+
+    # On windows, we can't fork
+    if os.name == "nt":
+        os.environ["WANDB_START_METHOD"] = "thread"
+    else:
+        os.environ["WANDB_START_METHOD"] = "fork"
+
+    _wandb = _wandb or wandb
+
+    run = _wandb.init(**wandb_init_kwargs)
+    _run_wandb_process_run_info_hook(run)
+
+    # Record `setup_wandb` usage when everything has setup successfully.
+    air_usage.tag_setup_wandb()
+
+    return run
+
+
+def _is_allowed_type(obj):
+    """Return True if type is allowed for logging to wandb"""
+    if isinstance(obj, np.ndarray) and obj.size == 1:
+        return isinstance(obj.item(), Number)
+    if isinstance(obj, Sequence) and len(obj) > 0:
+        return isinstance(obj[0], (Image, Video, WBValue))
+    return isinstance(obj, (Number, WBValue))
+
+
+def _clean_log(obj: Any):
+    # Fixes https://github.com/ray-project/ray/issues/10631
+    if isinstance(obj, dict):
+        return {k: _clean_log(v) for k, v in obj.items()}
+    elif isinstance(obj, (list, set)):
+        return [_clean_log(v) for v in obj]
+    elif isinstance(obj, tuple):
+        return tuple(_clean_log(v) for v in obj)
+    elif isinstance(obj, np.ndarray) and obj.ndim == 3:
+        # Must be single image (H, W, C).
+        return Image(obj)
+    elif isinstance(obj, np.ndarray) and obj.ndim == 4:
+        # Must be batch of images (N >= 1, H, W, C).
+        return (
+            _clean_log([Image(v) for v in obj]) if obj.shape[0] > 1 else Image(obj[0])
+        )
+    elif isinstance(obj, np.ndarray) and obj.ndim == 5:
+        # Must be batch of videos (N >= 1, T, C, W, H).
+        return (
+            _clean_log([Video(v) for v in obj]) if obj.shape[0] > 1 else Video(obj[0])
+        )
+    elif _is_allowed_type(obj):
+        return obj
+
+    # Else
+
+    try:
+        # This is what wandb uses internally. If we cannot dump
+        # an object using this method, wandb will raise an exception.
+        json_dumps_safer(obj)
+
+        # This is probably unnecessary, but left here to be extra sure.
+        pickle.dumps(obj)
+
+        return obj
+    except Exception:
+        # give up, similar to _SafeFallBackEncoder
+        fallback = str(obj)
+
+        # Try to convert to int
+        try:
+            fallback = int(fallback)
+            return fallback
+        except ValueError:
+            pass
+
+        # Try to convert to float
+        try:
+            fallback = float(fallback)
+            return fallback
+        except ValueError:
+            pass
+
+        # Else, return string
+        return fallback
+
+
+def _get_wandb_project(project: Optional[str] = None) -> Optional[str]:
+    """Get W&B project from environment variable or external hook if not passed
+    as and argument."""
+    if (
+        not project
+        and not os.environ.get(WANDB_PROJECT_ENV_VAR)
+        and os.environ.get(WANDB_POPULATE_RUN_LOCATION_HOOK)
+    ):
+        # Try to populate WANDB_PROJECT_ENV_VAR and WANDB_GROUP_ENV_VAR
+        # from external hook
+        try:
+            _load_class(os.environ[WANDB_POPULATE_RUN_LOCATION_HOOK])()
+        except Exception as e:
+            logger.exception(
+                f"Error executing {WANDB_POPULATE_RUN_LOCATION_HOOK} to "
+                f"populate {WANDB_PROJECT_ENV_VAR} and {WANDB_GROUP_ENV_VAR}: {e}",
+                exc_info=e,
+            )
+    if not project and os.environ.get(WANDB_PROJECT_ENV_VAR):
+        # Try to get project and group from environment variables if not
+        # passed through WandbLoggerCallback.
+        project = os.environ.get(WANDB_PROJECT_ENV_VAR)
+    return project
+
+
+def _set_api_key(api_key_file: Optional[str] = None, api_key: Optional[str] = None):
+    """Set WandB API key from `wandb_config`. Will pop the
+    `api_key_file` and `api_key` keys from `wandb_config` parameter.
+
+    The order of fetching the API key is:
+      1) From `api_key` or `api_key_file` arguments
+      2) From WANDB_API_KEY environment variables
+      3) User already logged in to W&B (wandb.api.api_key set)
+      4) From external hook WANDB_SETUP_API_KEY_HOOK
+    """
+    if os.environ.get(WANDB_MODE_ENV_VAR) in {"offline", "disabled"}:
+        return
+
+    if api_key_file:
+        if api_key:
+            raise ValueError("Both WandB `api_key_file` and `api_key` set.")
+        with open(api_key_file, "rt") as fp:
+            api_key = fp.readline().strip()
+
+    if not api_key and not os.environ.get(WANDB_ENV_VAR):
+        # Check if user is already logged into wandb.
+        try:
+            wandb.ensure_configured()
+            if wandb.api.api_key:
+                logger.info("Already logged into W&B.")
+                return
+        except AttributeError:
+            pass
+        # Try to get API key from external hook
+        if WANDB_SETUP_API_KEY_HOOK in os.environ:
+            try:
+                api_key = _load_class(os.environ[WANDB_SETUP_API_KEY_HOOK])()
+            except Exception as e:
+                logger.exception(
+                    f"Error executing {WANDB_SETUP_API_KEY_HOOK} to setup API key: {e}",
+                    exc_info=e,
+                )
+    if api_key:
+        os.environ[WANDB_ENV_VAR] = api_key
+    elif not os.environ.get(WANDB_ENV_VAR):
+        raise ValueError(
+            "No WandB API key found. Either set the {} environment "
+            "variable, pass `api_key` or `api_key_file` to the"
+            "`WandbLoggerCallback` class as arguments, "
+            "or run `wandb login` from the command line".format(WANDB_ENV_VAR)
+        )
+
+
+def _run_wandb_process_run_info_hook(run: Any) -> None:
+    """Run external hook to process information about wandb run"""
+    if WANDB_PROCESS_RUN_INFO_HOOK in os.environ:
+        try:
+            _load_class(os.environ[WANDB_PROCESS_RUN_INFO_HOOK])(run)
+        except Exception as e:
+            logger.exception(
+                f"Error calling {WANDB_PROCESS_RUN_INFO_HOOK}: {e}", exc_info=e
+            )
+
+
+class _QueueItem(enum.Enum):
+    END = enum.auto()
+    RESULT = enum.auto()
+    CHECKPOINT = enum.auto()
+
+
+class _WandbLoggingActor:
+    """
+    Wandb assumes that each trial's information should be logged from a
+    separate process. We use Ray actors as forking multiprocessing
+    processes is not supported by Ray and spawn processes run into pickling
+    problems.
+
+    We use a queue for the driver to communicate with the logging process.
+    The queue accepts the following items:
+
+    - If it's a dict, it is assumed to be a result and will be logged using
+      ``wandb.log()``
+    - If it's a checkpoint object, it will be saved using ``wandb.log_artifact()``.
+    """
+
+    def __init__(
+        self,
+        logdir: str,
+        queue: Queue,
+        exclude: List[str],
+        to_config: List[str],
+        *args,
+        **kwargs,
+    ):
+        import wandb
+
+        self._wandb = wandb
+
+        os.chdir(logdir)
+        self.queue = queue
+        self._exclude = set(exclude)
+        self._to_config = set(to_config)
+        self.args = args
+        self.kwargs = kwargs
+
+        self._trial_name = self.kwargs.get("name", "unknown")
+        self._logdir = logdir
+
+    def run(self):
+        # Since we're running in a separate process already, use threads.
+        os.environ["WANDB_START_METHOD"] = "thread"
+        run = self._wandb.init(*self.args, **self.kwargs)
+        run.config.trial_log_path = self._logdir
+
+        _run_wandb_process_run_info_hook(run)
+
+        while True:
+            item_type, item_content = self.queue.get()
+            if item_type == _QueueItem.END:
+                break
+
+            if item_type == _QueueItem.CHECKPOINT:
+                self._handle_checkpoint(item_content)
+                continue
+
+            assert item_type == _QueueItem.RESULT
+            log, config_update = self._handle_result(item_content)
+            try:
+                self._wandb.config.update(config_update, allow_val_change=True)
+                self._wandb.log(log, step=log.get(TRAINING_ITERATION))
+            except urllib.error.HTTPError as e:
+                # Ignore HTTPError. Missing a few data points is not a
+                # big issue, as long as things eventually recover.
+                logger.warning("Failed to log result to w&b: {}".format(str(e)))
+        self._wandb.finish()
+
+    def _handle_checkpoint(self, checkpoint_path: str):
+        artifact = self._wandb.Artifact(
+            name=f"checkpoint_{self._trial_name}", type="model"
+        )
+        artifact.add_dir(checkpoint_path)
+        self._wandb.log_artifact(artifact)
+
+    def _handle_result(self, result: Dict) -> Tuple[Dict, Dict]:
+        config_update = result.get("config", {}).copy()
+        log = {}
+        flat_result = flatten_dict(result, delimiter="/")
+
+        for k, v in flat_result.items():
+            if any(k.startswith(item + "/") or k == item for item in self._exclude):
+                continue
+            elif any(k.startswith(item + "/") or k == item for item in self._to_config):
+                config_update[k] = v
+            elif not _is_allowed_type(v):
+                continue
+            else:
+                log[k] = v
+
+        config_update.pop("callbacks", None)  # Remove callbacks
+        return log, config_update
+
+
+@PublicAPI(stability="alpha")
+class WandbLoggerCallback(LoggerCallback):
+    """WandbLoggerCallback
+
+    Weights and biases (https://www.wandb.ai/) is a tool for experiment
+    tracking, model optimization, and dataset versioning. This Ray Tune
+    ``LoggerCallback`` sends metrics to Wandb for automatic tracking and
+    visualization.
+
+    Example:
+
+        .. testcode::
+
+            import random
+
+            from ray import train, tune
+            from ray.train import RunConfig
+            from ray.air.integrations.wandb import WandbLoggerCallback
+
+
+            def train_func(config):
+                offset = random.random() / 5
+                for epoch in range(2, config["epochs"]):
+                    acc = 1 - (2 + config["lr"]) ** -epoch - random.random() / epoch - offset
+                    loss = (2 + config["lr"]) ** -epoch + random.random() / epoch + offset
+                    train.report({"acc": acc, "loss": loss})
+
+
+            tuner = tune.Tuner(
+                train_func,
+                param_space={
+                    "lr": tune.grid_search([0.001, 0.01, 0.1, 1.0]),
+                    "epochs": 10,
+                },
+                run_config=RunConfig(
+                    callbacks=[WandbLoggerCallback(project="Optimization_Project")]
+                ),
+            )
+            results = tuner.fit()
+
+        .. testoutput::
+            :hide:
+
+            ...
+
+    Args:
+        project: Name of the Wandb project. Mandatory.
+        group: Name of the Wandb group. Defaults to the trainable
+            name.
+        api_key_file: Path to file containing the Wandb API KEY. This
+            file only needs to be present on the node running the Tune script
+            if using the WandbLogger.
+        api_key: Wandb API Key. Alternative to setting ``api_key_file``.
+        excludes: List of metrics and config that should be excluded from
+            the log.
+        log_config: Boolean indicating if the ``config`` parameter of
+            the ``results`` dict should be logged. This makes sense if
+            parameters will change during training, e.g. with
+            PopulationBasedTraining. Defaults to False.
+        upload_checkpoints: If ``True``, model checkpoints will be uploaded to
+            Wandb as artifacts. Defaults to ``False``.
+        **kwargs: The keyword arguments will be pased to ``wandb.init()``.
+
+    Wandb's ``group``, ``run_id`` and ``run_name`` are automatically selected
+    by Tune, but can be overwritten by filling out the respective configuration
+    values.
+
+    Please see here for all other valid configuration settings:
+    https://docs.wandb.ai/library/init
+    """  # noqa: E501
+
+    # Do not log these result keys
+    _exclude_results = ["done", "should_checkpoint"]
+
+    AUTO_CONFIG_KEYS = [
+        "trial_id",
+        "experiment_tag",
+        "node_ip",
+        "experiment_id",
+        "hostname",
+        "pid",
+        "date",
+    ]
+    """Results that are saved with `wandb.config` instead of `wandb.log`."""
+
+    _logger_actor_cls = _WandbLoggingActor
+
+    def __init__(
+        self,
+        project: Optional[str] = None,
+        group: Optional[str] = None,
+        api_key_file: Optional[str] = None,
+        api_key: Optional[str] = None,
+        excludes: Optional[List[str]] = None,
+        log_config: bool = False,
+        upload_checkpoints: bool = False,
+        save_checkpoints: bool = False,
+        upload_timeout: int = DEFAULT_SYNC_TIMEOUT,
+        **kwargs,
+    ):
+        if not wandb:
+            raise RuntimeError(
+                "Wandb was not found - please install with `pip install wandb`"
+            )
+
+        if save_checkpoints:
+            warnings.warn(
+                "`save_checkpoints` is deprecated. Use `upload_checkpoints` instead.",
+                DeprecationWarning,
+            )
+            upload_checkpoints = save_checkpoints
+
+        self.project = project
+        self.group = group
+        self.api_key_path = api_key_file
+        self.api_key = api_key
+        self.excludes = excludes or []
+        self.log_config = log_config
+        self.upload_checkpoints = upload_checkpoints
+        self._upload_timeout = upload_timeout
+        self.kwargs = kwargs
+
+        self._remote_logger_class = None
+
+        self._trial_logging_actors: Dict[
+            "Trial", ray.actor.ActorHandle[_WandbLoggingActor]
+        ] = {}
+        self._trial_logging_futures: Dict["Trial", ray.ObjectRef] = {}
+        self._logging_future_to_trial: Dict[ray.ObjectRef, "Trial"] = {}
+        self._trial_queues: Dict["Trial", Queue] = {}
+
+    def setup(self, *args, **kwargs):
+        self.api_key_file = (
+            os.path.expanduser(self.api_key_path) if self.api_key_path else None
+        )
+        _set_api_key(self.api_key_file, self.api_key)
+
+        self.project = _get_wandb_project(self.project)
+        if not self.project:
+            raise ValueError(
+                "Please pass the project name as argument or through "
+                f"the {WANDB_PROJECT_ENV_VAR} environment variable."
+            )
+        if not self.group and os.environ.get(WANDB_GROUP_ENV_VAR):
+            self.group = os.environ.get(WANDB_GROUP_ENV_VAR)
+
+    def log_trial_start(self, trial: "Trial"):
+        config = trial.config.copy()
+
+        config.pop("callbacks", None)  # Remove callbacks
+
+        exclude_results = self._exclude_results.copy()
+
+        # Additional excludes
+        exclude_results += self.excludes
+
+        # Log config keys on each result?
+        if not self.log_config:
+            exclude_results += ["config"]
+
+        # Fill trial ID and name
+        trial_id = trial.trial_id if trial else None
+        trial_name = str(trial) if trial else None
+
+        # Project name for Wandb
+        wandb_project = self.project
+
+        # Grouping
+        wandb_group = self.group or trial.experiment_dir_name if trial else None
+
+        # remove unpickleable items!
+        config = _clean_log(config)
+        config = {
+            key: value for key, value in config.items() if key not in self.excludes
+        }
+
+        wandb_init_kwargs = dict(
+            id=trial_id,
+            name=trial_name,
+            resume=False,
+            reinit=True,
+            allow_val_change=True,
+            group=wandb_group,
+            project=wandb_project,
+            config=config,
+        )
+        wandb_init_kwargs.update(self.kwargs)
+
+        self._start_logging_actor(trial, exclude_results, **wandb_init_kwargs)
+
+    def _start_logging_actor(
+        self, trial: "Trial", exclude_results: List[str], **wandb_init_kwargs
+    ):
+        # Reuse actor if one already exists.
+        # This can happen if the trial is restarted.
+        if trial in self._trial_logging_futures:
+            return
+
+        if not self._remote_logger_class:
+            env_vars = {}
+            # API key env variable is not set if authenticating through `wandb login`
+            if WANDB_ENV_VAR in os.environ:
+                env_vars[WANDB_ENV_VAR] = os.environ[WANDB_ENV_VAR]
+            self._remote_logger_class = ray.remote(
+                num_cpus=0,
+                **_force_on_current_node(),
+                runtime_env={"env_vars": env_vars},
+                max_restarts=-1,
+                max_task_retries=-1,
+            )(self._logger_actor_cls)
+
+        self._trial_queues[trial] = Queue(
+            actor_options={
+                "num_cpus": 0,
+                **_force_on_current_node(),
+                "max_restarts": -1,
+                "max_task_retries": -1,
+            }
+        )
+        self._trial_logging_actors[trial] = self._remote_logger_class.remote(
+            logdir=trial.local_path,
+            queue=self._trial_queues[trial],
+            exclude=exclude_results,
+            to_config=self.AUTO_CONFIG_KEYS,
+            **wandb_init_kwargs,
+        )
+        logging_future = self._trial_logging_actors[trial].run.remote()
+        self._trial_logging_futures[trial] = logging_future
+        self._logging_future_to_trial[logging_future] = trial
+
+    def _signal_logging_actor_stop(self, trial: "Trial"):
+        self._trial_queues[trial].put((_QueueItem.END, None))
+
+    def log_trial_result(self, iteration: int, trial: "Trial", result: Dict):
+        if trial not in self._trial_logging_actors:
+            self.log_trial_start(trial)
+
+        result = _clean_log(result)
+        self._trial_queues[trial].put((_QueueItem.RESULT, result))
+
+    def log_trial_save(self, trial: "Trial"):
+        if self.upload_checkpoints and trial.checkpoint:
+            checkpoint_root = None
+            if isinstance(trial.checkpoint.filesystem, pyarrow.fs.LocalFileSystem):
+                checkpoint_root = trial.checkpoint.path
+
+            if checkpoint_root:
+                self._trial_queues[trial].put((_QueueItem.CHECKPOINT, checkpoint_root))
+
+    def log_trial_end(self, trial: "Trial", failed: bool = False):
+        self._signal_logging_actor_stop(trial=trial)
+        self._cleanup_logging_actors()
+
+    def _cleanup_logging_actor(self, trial: "Trial"):
+        del self._trial_queues[trial]
+        del self._trial_logging_futures[trial]
+        ray.kill(self._trial_logging_actors[trial])
+        del self._trial_logging_actors[trial]
+
+    def _cleanup_logging_actors(self, timeout: int = 0, kill_on_timeout: bool = False):
+        """Clean up logging actors that have finished uploading to wandb.
+        Waits for `timeout` seconds to collect finished logging actors.
+
+        Args:
+            timeout: The number of seconds to wait. Defaults to 0 to clean up
+                any immediate logging actors during the run.
+                This is set to a timeout threshold to wait for pending uploads
+                on experiment end.
+            kill_on_timeout: Whether or not to kill and cleanup the logging actor if
+                it hasn't finished within the timeout.
+        """
+
+        futures = list(self._trial_logging_futures.values())
+        done, remaining = ray.wait(futures, num_returns=len(futures), timeout=timeout)
+        for ready_future in done:
+            finished_trial = self._logging_future_to_trial.pop(ready_future)
+            self._cleanup_logging_actor(finished_trial)
+
+        if kill_on_timeout:
+            for remaining_future in remaining:
+                trial = self._logging_future_to_trial.pop(remaining_future)
+                self._cleanup_logging_actor(trial)
+
+    def on_experiment_end(self, trials: List["Trial"], **info):
+        """Wait for the actors to finish their call to `wandb.finish`.
+        This includes uploading all logs + artifacts to wandb."""
+        self._cleanup_logging_actors(timeout=self._upload_timeout, kill_on_timeout=True)
+
+    def __del__(self):
+        if ray.is_initialized():
+            for trial in list(self._trial_logging_actors):
+                self._signal_logging_actor_stop(trial=trial)
+
+            self._cleanup_logging_actors(timeout=2, kill_on_timeout=True)
+
+        self._trial_logging_actors = {}
+        self._trial_logging_futures = {}
+        self._logging_future_to_trial = {}
+        self._trial_queues = {}
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..570becd6d49700f4b03211d24a9f40117fe5fc55
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/check_ingest.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/check_ingest.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f5aead7c9d0a333b6bc7e88f479c9d6d55c7b648
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/check_ingest.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/data_batch_conversion.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/data_batch_conversion.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6f2eafe1341e069d3f888bf6ebc7494994727f5b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/data_batch_conversion.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/node.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/node.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc2fd249edb52d95064eb53d94ee88391800a9df
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/node.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/torch_dist.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/torch_dist.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e097adb5d1d0365a54d74232c893eb7256d3bfe9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/torch_dist.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/transform_pyarrow.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/transform_pyarrow.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9ebd6add7f93acc364b04a4bd86256167599bf15
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/__pycache__/transform_pyarrow.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/check_ingest.py b/.venv/lib/python3.11/site-packages/ray/air/util/check_ingest.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f43ee4cf6abbba124493f5efea604f3e710d77b
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/check_ingest.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+
+import sys
+import time
+from typing import Optional
+
+import numpy as np
+
+import ray
+from ray import train
+from ray.air.config import DatasetConfig, ScalingConfig
+from ray.data import DataIterator, Dataset, Preprocessor
+from ray.train import DataConfig
+from ray.train.data_parallel_trainer import DataParallelTrainer
+from ray.util.annotations import Deprecated, DeveloperAPI
+
+MAKE_LOCAL_DATA_ITERATOR_DEPRECATION_MSG = """
+make_local_dataset_iterator is deprecated. Call ``iterator()`` directly on your dataset instead to create a local DataIterator.
+"""  # noqa: E501
+
+
+@DeveloperAPI
+class DummyTrainer(DataParallelTrainer):
+    """A Trainer that does nothing except read the data for a given number of epochs.
+
+    It prints out as much debugging statistics as possible.
+
+    This is useful for debugging data ingest problem. This trainer supports normal
+    scaling options same as any other Trainer (e.g., num_workers, use_gpu).
+
+    Args:
+        scaling_config: Configuration for how to scale training. This is the same
+            as for :class:`~ray.train.base_trainer.BaseTrainer`.
+        num_epochs: How many many times to iterate through the datasets for.
+        prefetch_batches: The number of batches to prefetch ahead of the
+            current block during the scan. This is the same as
+            :meth:`~ray.data.Dataset.iter_batches`
+    """
+
+    def __init__(
+        self,
+        *args,
+        scaling_config: Optional[ScalingConfig] = None,
+        num_epochs: int = 1,
+        prefetch_batches: int = 1,
+        batch_size: Optional[int] = 4096,
+        **kwargs,
+    ):
+        if not scaling_config:
+            scaling_config = ScalingConfig(num_workers=1)
+        super().__init__(
+            train_loop_per_worker=DummyTrainer.make_train_loop(
+                num_epochs, prefetch_batches, batch_size
+            ),
+            *args,
+            scaling_config=scaling_config,
+            **kwargs,
+        )
+
+    @staticmethod
+    def make_train_loop(
+        num_epochs: int,
+        prefetch_batches: int,
+        batch_size: Optional[int],
+    ):
+        """Make a debug train loop that runs for the given amount of epochs."""
+
+        def train_loop_per_worker():
+            import pandas as pd
+
+            rank = train.get_context().get_world_rank()
+            data_shard = train.get_dataset_shard("train")
+            start = time.perf_counter()
+            epochs_read, batches_read, bytes_read = 0, 0, 0
+            batch_delays = []
+
+            print("Starting train loop on worker", rank)
+            for epoch in range(num_epochs):
+                epochs_read += 1
+                batch_start = time.perf_counter()
+                for batch in data_shard.iter_batches(
+                    prefetch_batches=prefetch_batches,
+                    batch_size=batch_size,
+                ):
+                    batch_delay = time.perf_counter() - batch_start
+                    batch_delays.append(batch_delay)
+                    batches_read += 1
+                    if isinstance(batch, pd.DataFrame):
+                        bytes_read += int(
+                            batch.memory_usage(index=True, deep=True).sum()
+                        )
+                    elif isinstance(batch, np.ndarray):
+                        bytes_read += batch.nbytes
+                    elif isinstance(batch, dict):
+                        for arr in batch.values():
+                            bytes_read += arr.nbytes
+                    else:
+                        # NOTE: This isn't recursive and will just return the size of
+                        # the object pointers if list of non-primitive types.
+                        bytes_read += sys.getsizeof(batch)
+                    train.report(
+                        dict(
+                            bytes_read=bytes_read,
+                            batches_read=batches_read,
+                            epochs_read=epochs_read,
+                            batch_delay=batch_delay,
+                        )
+                    )
+                    batch_start = time.perf_counter()
+            delta = time.perf_counter() - start
+
+            print("Time to read all data", delta, "seconds")
+            print(
+                "P50/P95/Max batch delay (s)",
+                np.quantile(batch_delays, 0.5),
+                np.quantile(batch_delays, 0.95),
+                np.max(batch_delays),
+            )
+            print("Num epochs read", epochs_read)
+            print("Num batches read", batches_read)
+            print("Num bytes read", round(bytes_read / (1024 * 1024), 2), "MiB")
+            print(
+                "Mean throughput", round(bytes_read / (1024 * 1024) / delta, 2), "MiB/s"
+            )
+
+            if rank == 0:
+                print("Ingest stats from rank=0:\n\n{}".format(data_shard.stats()))
+
+        return train_loop_per_worker
+
+
+@Deprecated(MAKE_LOCAL_DATA_ITERATOR_DEPRECATION_MSG)
+def make_local_dataset_iterator(
+    dataset: Dataset,
+    preprocessor: Preprocessor,
+    dataset_config: DatasetConfig,
+) -> DataIterator:
+    """A helper function to create a local
+    :py:class:`DataIterator <ray.data.DataIterator>`,
+    like the one returned by :meth:`~ray.train.get_dataset_shard`.
+
+    This function should only be used for development and debugging. It will
+    raise an exception if called by a worker instead of the driver.
+
+    Args:
+        dataset: The input Dataset.
+        preprocessor: The preprocessor that will be applied to the input dataset.
+        dataset_config: The dataset config normally passed to the trainer.
+    """
+    raise DeprecationWarning(MAKE_LOCAL_DATA_ITERATOR_DEPRECATION_MSG)
+
+
+if __name__ == "__main__":
+
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--num-epochs", "-e", type=int, default=1, help="Number of epochs to read."
+    )
+    parser.add_argument(
+        "--prefetch-batches",
+        "-b",
+        type=int,
+        default=1,
+        help="Number of batches to prefetch when reading data.",
+    )
+
+    args = parser.parse_args()
+
+    # Generate a synthetic dataset of ~10GiB of float64 data. The dataset is sharded
+    # into 100 blocks (override_num_blocks=100).
+    ds = ray.data.range_tensor(50000, shape=(80, 80, 4), override_num_blocks=100)
+
+    # An example preprocessing chain that just scales all values by 4.0 in two stages.
+    ds = ds.map_batches(lambda df: df * 2, batch_format="pandas")
+    ds = ds.map_batches(lambda df: df * 2, batch_format="pandas")
+
+    # Setup the dummy trainer that prints ingest stats.
+    # Run and print ingest stats.
+    trainer = DummyTrainer(
+        scaling_config=ScalingConfig(num_workers=1, use_gpu=False),
+        datasets={"train": ds},
+        num_epochs=args.num_epochs,
+        prefetch_batches=args.prefetch_batches,
+        dataset_config=DataConfig(),
+        batch_size=None,
+    )
+    print("Dataset config", trainer.get_dataset_config())
+    trainer.fit()
+
+    # Print memory stats (you can also use "ray memory --stats-only" to monitor this
+    # during the middle of the run.
+    try:
+        print(
+            "Memory stats at end of ingest:\n\n{}".format(
+                ray._private.internal_api.memory_summary(stats_only=True)
+            )
+        )
+    except Exception:
+        print("Error getting Ray memory stats")
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/data_batch_conversion.py b/.venv/lib/python3.11/site-packages/ray/air/util/data_batch_conversion.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bf69b4b93989f8d341d2d5a3fa9423435602aa5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/data_batch_conversion.py
@@ -0,0 +1,353 @@
+import warnings
+from enum import Enum
+from typing import TYPE_CHECKING, Dict, List, Union
+
+import numpy as np
+
+from ray.air.constants import TENSOR_COLUMN_NAME
+from ray.air.data_batch_type import DataBatchType
+from ray.util.annotations import Deprecated, DeveloperAPI
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+# TODO: Consolidate data conversion edges for arrow bug workaround.
+try:
+    import pyarrow
+except ImportError:
+    pyarrow = None
+
+# Lazy import to avoid ray init failures without pandas installed and allow
+# dataset to import modules in this file.
+_pandas = None
+
+
+def _lazy_import_pandas():
+    global _pandas
+    if _pandas is None:
+        import pandas
+
+        _pandas = pandas
+    return _pandas
+
+
+@DeveloperAPI
+class BatchFormat(str, Enum):
+    PANDAS = "pandas"
+    # TODO: Remove once Arrow is deprecated as user facing batch format
+    ARROW = "arrow"
+    NUMPY = "numpy"  # Either a single numpy array or a Dict of numpy arrays.
+
+
+@DeveloperAPI
+class BlockFormat(str, Enum):
+    """Internal Dataset block format enum."""
+
+    PANDAS = "pandas"
+    ARROW = "arrow"
+    SIMPLE = "simple"
+
+
+def _convert_batch_type_to_pandas(
+    data: DataBatchType,
+    cast_tensor_columns: bool = False,
+) -> "pd.DataFrame":
+    """Convert the provided data to a Pandas DataFrame.
+
+    Args:
+        data: Data of type DataBatchType
+        cast_tensor_columns: Whether tensor columns should be cast to NumPy ndarrays.
+
+    Returns:
+        A pandas Dataframe representation of the input data.
+
+    """
+    pd = _lazy_import_pandas()
+
+    if isinstance(data, np.ndarray):
+        data = pd.DataFrame({TENSOR_COLUMN_NAME: _ndarray_to_column(data)})
+    elif isinstance(data, dict):
+        tensor_dict = {}
+        for col_name, col in data.items():
+            if not isinstance(col, np.ndarray):
+                raise ValueError(
+                    "All values in the provided dict must be of type "
+                    f"np.ndarray. Found type {type(col)} for key {col_name} "
+                    f"instead."
+                )
+            tensor_dict[col_name] = _ndarray_to_column(col)
+        data = pd.DataFrame(tensor_dict)
+    elif pyarrow is not None and isinstance(data, pyarrow.Table):
+        data = data.to_pandas()
+    elif not isinstance(data, pd.DataFrame):
+        raise ValueError(
+            f"Received data of type: {type(data)}, but expected it to be one "
+            f"of {DataBatchType}"
+        )
+    if cast_tensor_columns:
+        data = _cast_tensor_columns_to_ndarrays(data)
+    return data
+
+
+def _convert_pandas_to_batch_type(
+    data: "pd.DataFrame",
+    type: BatchFormat,
+    cast_tensor_columns: bool = False,
+) -> DataBatchType:
+    """Convert the provided Pandas dataframe to the provided ``type``.
+
+    Args:
+        data: A Pandas DataFrame
+        type: The specific ``BatchFormat`` to convert to.
+        cast_tensor_columns: Whether tensor columns should be cast to our tensor
+            extension type.
+
+    Returns:
+        The input data represented with the provided type.
+    """
+    if cast_tensor_columns:
+        data = _cast_ndarray_columns_to_tensor_extension(data)
+    if type == BatchFormat.PANDAS:
+        return data
+
+    elif type == BatchFormat.NUMPY:
+        if len(data.columns) == 1:
+            # If just a single column, return as a single numpy array.
+            return data.iloc[:, 0].to_numpy()
+        else:
+            # Else return as a dict of numpy arrays.
+            output_dict = {}
+            for column in data:
+                output_dict[column] = data[column].to_numpy()
+            return output_dict
+
+    elif type == BatchFormat.ARROW:
+        if not pyarrow:
+            raise ValueError(
+                "Attempted to convert data to Pyarrow Table but Pyarrow "
+                "is not installed. Please do `pip install pyarrow` to "
+                "install Pyarrow."
+            )
+        return pyarrow.Table.from_pandas(data)
+
+    else:
+        raise ValueError(
+            f"Received type {type}, but expected it to be one of {DataBatchType}"
+        )
+
+
+@Deprecated
+def convert_batch_type_to_pandas(
+    data: DataBatchType,
+    cast_tensor_columns: bool = False,
+):
+    """Convert the provided data to a Pandas DataFrame.
+
+    This API is deprecated from Ray 2.4.
+
+    Args:
+        data: Data of type DataBatchType
+        cast_tensor_columns: Whether tensor columns should be cast to NumPy ndarrays.
+
+    Returns:
+        A pandas Dataframe representation of the input data.
+
+    """
+    warnings.warn(
+        "`convert_batch_type_to_pandas` is deprecated as a developer API "
+        "starting from Ray 2.4. All batch format conversions should be "
+        "done manually instead of relying on this API.",
+        PendingDeprecationWarning,
+    )
+    return _convert_batch_type_to_pandas(
+        data=data, cast_tensor_columns=cast_tensor_columns
+    )
+
+
+@Deprecated
+def convert_pandas_to_batch_type(
+    data: "pd.DataFrame",
+    type: BatchFormat,
+    cast_tensor_columns: bool = False,
+):
+    """Convert the provided Pandas dataframe to the provided ``type``.
+
+    Args:
+        data: A Pandas DataFrame
+        type: The specific ``BatchFormat`` to convert to.
+        cast_tensor_columns: Whether tensor columns should be cast to our tensor
+            extension type.
+
+    Returns:
+        The input data represented with the provided type.
+    """
+    warnings.warn(
+        "`convert_pandas_to_batch_type` is deprecated as a developer API "
+        "starting from Ray 2.4. All batch format conversions should be "
+        "done manually instead of relying on this API.",
+        PendingDeprecationWarning,
+    )
+    return _convert_pandas_to_batch_type(
+        data=data, type=type, cast_tensor_columns=cast_tensor_columns
+    )
+
+
+def _convert_batch_type_to_numpy(
+    data: DataBatchType,
+) -> Union[np.ndarray, Dict[str, np.ndarray]]:
+    """Convert the provided data to a NumPy ndarray or dict of ndarrays.
+
+    Args:
+        data: Data of type DataBatchType
+
+    Returns:
+        A numpy representation of the input data.
+    """
+    pd = _lazy_import_pandas()
+
+    if isinstance(data, np.ndarray):
+        return data
+    elif isinstance(data, dict):
+        for col_name, col in data.items():
+            if not isinstance(col, np.ndarray):
+                raise ValueError(
+                    "All values in the provided dict must be of type "
+                    f"np.ndarray. Found type {type(col)} for key {col_name} "
+                    f"instead."
+                )
+        return data
+    elif pyarrow is not None and isinstance(data, pyarrow.Table):
+        from ray.air.util.tensor_extensions.arrow import (
+            get_arrow_extension_fixed_shape_tensor_types,
+        )
+        from ray.data._internal.arrow_ops import transform_pyarrow
+
+        column_values_ndarrays = []
+
+        for col in data.columns:
+            # Combine columnar values arrays to make these contiguous
+            # (making them compatible with numpy format)
+            combined_array = transform_pyarrow.combine_chunked_array(col)
+
+            column_values_ndarrays.append(
+                transform_pyarrow.to_numpy(combined_array, zero_copy_only=False)
+            )
+
+        arrow_fixed_shape_tensor_types = get_arrow_extension_fixed_shape_tensor_types()
+
+        # NOTE: This branch is here for backwards-compatibility
+        if data.column_names == [TENSOR_COLUMN_NAME] and (
+            isinstance(data.schema.types[0], arrow_fixed_shape_tensor_types)
+        ):
+            return column_values_ndarrays[0]
+
+        return dict(zip(data.column_names, column_values_ndarrays))
+    elif isinstance(data, pd.DataFrame):
+        return _convert_pandas_to_batch_type(data, BatchFormat.NUMPY)
+    else:
+        raise ValueError(
+            f"Received data of type: {type(data)}, but expected it to be one "
+            f"of {DataBatchType}"
+        )
+
+
+def _ndarray_to_column(arr: np.ndarray) -> Union["pd.Series", List[np.ndarray]]:
+    """Convert a NumPy ndarray into an appropriate column format for insertion into a
+    pandas DataFrame.
+
+    If conversion to a pandas Series fails (e.g. if the ndarray is multi-dimensional),
+    fall back to a list of NumPy ndarrays.
+    """
+    pd = _lazy_import_pandas()
+    try:
+        # Try to convert to Series, falling back to a list conversion if this fails
+        # (e.g. if the ndarray is multi-dimensional).
+        return pd.Series(arr)
+    except ValueError:
+        return list(arr)
+
+
+def _unwrap_ndarray_object_type_if_needed(arr: np.ndarray) -> np.ndarray:
+    """Unwrap an object-dtyped NumPy ndarray containing ndarray pointers into a single
+    contiguous ndarray, if needed/possible.
+    """
+    if arr.dtype.type is np.object_:
+        try:
+            # Try to convert the NumPy ndarray to a non-object dtype.
+            arr = np.array([np.asarray(v) for v in arr])
+        except Exception:
+            # This may fail if the subndarrays are of heterogeneous shape
+            pass
+    return arr
+
+
+def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFrame":
+    """
+    Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray.
+    """
+    pd = _lazy_import_pandas()
+    try:
+        SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
+    except AttributeError:
+        # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0.
+        SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
+
+    from ray.air.util.tensor_extensions.pandas import (
+        TensorArray,
+        column_needs_tensor_extension,
+    )
+
+    # Try to convert any ndarray columns to TensorArray columns.
+    # TODO(Clark): Once Pandas supports registering extension types for type
+    # inference on construction, implement as much for NumPy ndarrays and remove
+    # this. See https://github.com/pandas-dev/pandas/issues/41848
+    # TODO(Clark): Optimize this with propagated DataFrame metadata containing a list of
+    # column names containing tensor columns, to make this an O(# of tensor columns)
+    # check rather than the current O(# of columns) check.
+    for col_name, col in df.items():
+        if column_needs_tensor_extension(col):
+            try:
+                # Suppress Pandas warnings:
+                # https://github.com/ray-project/ray/issues/29270
+                # We actually want in-place operations so we surpress this warning.
+                # https://stackoverflow.com/a/74193599
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore", category=FutureWarning)
+                    warnings.simplefilter("ignore", category=SettingWithCopyWarning)
+                    df[col_name] = TensorArray(col)
+            except Exception as e:
+                raise ValueError(
+                    f"Tried to cast column {col_name} to the TensorArray tensor "
+                    "extension type but the conversion failed. To disable "
+                    "automatic casting to this tensor extension, set "
+                    "ctx = DataContext.get_current(); "
+                    "ctx.enable_tensor_extension_casting = False."
+                ) from e
+    return df
+
+
+def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame":
+    """Cast all tensor extension columns in df to NumPy ndarrays."""
+    pd = _lazy_import_pandas()
+    try:
+        SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
+    except AttributeError:
+        # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0.
+        SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
+    from ray.air.util.tensor_extensions.pandas import TensorDtype
+
+    # Try to convert any tensor extension columns to ndarray columns.
+    # TODO(Clark): Optimize this with propagated DataFrame metadata containing a list of
+    # column names containing tensor columns, to make this an O(# of tensor columns)
+    # check rather than the current O(# of columns) check.
+    for col_name, col in df.items():
+        if isinstance(col.dtype, TensorDtype):
+            # Suppress Pandas warnings:
+            # https://github.com/ray-project/ray/issues/29270
+            # We actually want in-place operations so we surpress this warning.
+            # https://stackoverflow.com/a/74193599
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", category=FutureWarning)
+                warnings.simplefilter("ignore", category=SettingWithCopyWarning)
+                df[col_name] = list(col.to_numpy())
+    return df
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/node.py b/.venv/lib/python3.11/site-packages/ray/air/util/node.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ea3558878f3b220f1ef97210193d5b0e626ef4
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/node.py
@@ -0,0 +1,69 @@
+from typing import Dict, Optional, Union
+
+import ray
+
+
+def _get_node_id_from_node_ip(node_ip: str) -> Optional[str]:
+    """Returns the node ID for the first alive node with the input IP."""
+    for node in ray.nodes():
+        if node["Alive"] and node["NodeManagerAddress"] == node_ip:
+            return node["NodeID"]
+
+    return None
+
+
+def _force_on_node(
+    node_id: str,
+    remote_func_or_actor_class: Optional[
+        Union[ray.remote_function.RemoteFunction, ray.actor.ActorClass]
+    ] = None,
+) -> Union[Union[ray.remote_function.RemoteFunction, ray.actor.ActorClass], Dict]:
+    """Schedule a remote function or actor class on a given node.
+
+    Args:
+        node_id: The node to schedule on.
+        remote_func_or_actor_class: A Ray remote function or actor class
+            to schedule on the input node. If None, this function will directly
+            return the options dict to pass to another remote function or actor class
+            as remote options.
+    Returns:
+        The provided remote function or actor class, but with options modified to force
+        placement on the input node. If remote_func_or_actor_class is None,
+        the options dict to pass to another remote function or
+        actor class as remote options kwargs.
+    """
+
+    scheduling_strategy = ray.util.scheduling_strategies.NodeAffinitySchedulingStrategy(
+        node_id=node_id, soft=False
+    )
+
+    options = {"scheduling_strategy": scheduling_strategy}
+
+    if remote_func_or_actor_class is None:
+        return options
+
+    return remote_func_or_actor_class.options(**options)
+
+
+def _force_on_current_node(
+    remote_func_or_actor_class: Optional[
+        Union[ray.remote_function.RemoteFunction, ray.actor.ActorClass]
+    ] = None
+) -> Union[Union[ray.remote_function.RemoteFunction, ray.actor.ActorClass], Dict]:
+    """Schedule a remote function or actor class on the current node.
+
+    If using Ray Client, the current node is the client server node.
+
+    Args:
+        remote_func_or_actor_class: A Ray remote function or actor class
+            to schedule on the current node. If None, this function will directly
+            return the options dict to pass to another remote function or actor class
+            as remote options.
+    Returns:
+        The provided remote function or actor class, but with options modified to force
+        placement on the current node. If remote_func_or_actor_class is None,
+        the options dict to pass to another remote function or
+        actor class as remote options kwargs.
+    """
+    current_node_id = ray.get_runtime_context().get_node_id()
+    return _force_on_node(current_node_id, remote_func_or_actor_class)
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/arrow.py b/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/arrow.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56a04869855e6f162ffd35eb7f8e63d0f854b0e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/arrow.py
@@ -0,0 +1,119 @@
+import pickle
+import typing
+
+import numpy as np
+import pyarrow as pa
+from packaging.version import parse as parse_version
+
+import ray.air.util.object_extensions.pandas
+from ray._private.serialization import pickle_dumps
+from ray._private.utils import _get_pyarrow_version
+from ray.util.annotations import PublicAPI
+
+MIN_PYARROW_VERSION_SCALAR_SUBCLASS = parse_version("9.0.0")
+
+_VER = _get_pyarrow_version()
+PYARROW_VERSION = None if _VER is None else parse_version(_VER)
+
+
+def _object_extension_type_allowed() -> bool:
+    return (
+        PYARROW_VERSION is not None
+        and PYARROW_VERSION >= MIN_PYARROW_VERSION_SCALAR_SUBCLASS
+    )
+
+
+# Please see https://arrow.apache.org/docs/python/extending_types.html for more info
+@PublicAPI(stability="alpha")
+class ArrowPythonObjectType(pa.ExtensionType):
+    """Defines a new Arrow extension type for Python objects.
+    We do not require a parametrized type, so the constructor does not
+    take any arguments
+    """
+
+    def __init__(self) -> None:
+        # Defines the underlying storage type as the PyArrow LargeBinary type
+        super().__init__(pa.large_binary(), "ray.data.arrow_pickled_object")
+
+    def __arrow_ext_serialize__(self) -> bytes:
+        # Since there are no type parameters, we are free to return empty
+        return b""
+
+    @classmethod
+    def __arrow_ext_deserialize__(
+        cls, storage_type: pa.DataType, serialized: bytes
+    ) -> "ArrowPythonObjectType":
+        return ArrowPythonObjectType()
+
+    def __arrow_ext_scalar_class__(self) -> type:
+        """Returns the scalar class of the extension type. Indexing out of the
+        PyArrow extension array will return instances of this type.
+        """
+        return ArrowPythonObjectScalar
+
+    def __arrow_ext_class__(self) -> type:
+        """Returns the array type of the extension type. Selecting one array
+        out of the ChunkedArray that makes up a column in a Table with
+        this custom type will return an instance of this type.
+        """
+        return ArrowPythonObjectArray
+
+    def to_pandas_dtype(self):
+        """Pandas interoperability type. This describes the Pandas counterpart
+        to the Arrow type. See https://pandas.pydata.org/docs/development/extending.html
+        for more information.
+        """
+        return ray.air.util.object_extensions.pandas.PythonObjectDtype()
+
+    def __reduce__(self):
+        # Earlier PyArrow versions require custom pickling behavior.
+        return self.__arrow_ext_deserialize__, (
+            self.storage_type,
+            self.__arrow_ext_serialize__(),
+        )
+
+
+@PublicAPI(stability="alpha")
+class ArrowPythonObjectScalar(pa.ExtensionScalar):
+    """Scalar class for ArrowPythonObjectType"""
+
+    def as_py(self) -> typing.Any:
+        if not isinstance(self.value, pa.LargeBinaryScalar):
+            raise RuntimeError(
+                f"{type(self.value)} is not the expected LargeBinaryScalar"
+            )
+        return pickle.load(pa.BufferReader(self.value.as_buffer()))
+
+
+@PublicAPI(stability="alpha")
+class ArrowPythonObjectArray(pa.ExtensionArray):
+    """Array class for ArrowPythonObjectType"""
+
+    def from_objects(
+        objects: typing.Union[np.ndarray, typing.Iterable[typing.Any]]
+    ) -> "ArrowPythonObjectArray":
+        if isinstance(objects, np.ndarray):
+            objects = objects.tolist()
+        type_ = ArrowPythonObjectType()
+        all_dumped_bytes = []
+        for obj in objects:
+            dumped_bytes = pickle_dumps(
+                obj, "Error pickling object to convert to Arrow"
+            )
+            all_dumped_bytes.append(dumped_bytes)
+        arr = pa.array(all_dumped_bytes, type=type_.storage_type)
+        return ArrowPythonObjectArray.from_storage(type_, arr)
+
+    def to_numpy(
+        self, zero_copy_only: bool = False, writable: bool = False
+    ) -> np.ndarray:
+        arr = np.empty(len(self), dtype=object)
+        arr[:] = self.to_pylist()
+        return arr
+
+
+try:
+    pa.register_extension_type(ArrowPythonObjectType())
+except pa.ArrowKeyError:
+    # Already registered
+    pass
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/pandas.py b/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/pandas.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbc5732f350b8c89d314b8a634c8809ad7e817b3
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/object_extensions/pandas.py
@@ -0,0 +1,146 @@
+import collections.abc
+import typing
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+from pandas._libs import lib
+from pandas._typing import ArrayLike, Dtype, PositionalIndexer, TakeIndexer, npt
+
+import ray.air.util.object_extensions.arrow
+from ray.util.annotations import PublicAPI
+
+
+# See https://pandas.pydata.org/docs/development/extending.html for more information.
+@PublicAPI(stability="alpha")
+class PythonObjectArray(pd.api.extensions.ExtensionArray):
+    """Implements the Pandas extension array interface for the Arrow object array"""
+
+    def __init__(self, values: collections.abc.Iterable[typing.Any]):
+        vals = list(values)
+        self.values = np.empty(len(vals), dtype=object)
+        self.values[:] = vals
+
+    @classmethod
+    def _from_sequence(
+        cls,
+        scalars: collections.abc.Sequence[typing.Any],
+        *,
+        dtype: typing.Union[Dtype, None] = None,
+        copy: bool = False,
+    ) -> "PythonObjectArray":
+        return PythonObjectArray(scalars)
+
+    @classmethod
+    def _from_factorized(
+        cls, values: collections.abc.Sequence[typing.Any], original: "PythonObjectArray"
+    ) -> "PythonObjectArray":
+        return PythonObjectArray(values)
+
+    def __getitem__(self, item: PositionalIndexer) -> typing.Any:
+        return self.values[item]
+
+    def __setitem__(self, key, value) -> None:
+        self.values[key] = value
+
+    def __len__(self) -> int:
+        return len(self.values)
+
+    def __eq__(self, other: object) -> ArrayLike:
+        if isinstance(other, PythonObjectArray):
+            return self.values == other.values
+        elif isinstance(other, np.ndarray):
+            return self.values == other
+        else:
+            return NotImplemented
+
+    def to_numpy(
+        self,
+        dtype: typing.Union["npt.DTypeLike", None] = None,
+        copy: bool = False,
+        na_value: object = lib.no_default,
+    ) -> np.ndarray:
+        result = self.values
+        if copy or na_value is not lib.no_default:
+            result = result.copy()
+        if na_value is not lib.no_default:
+            result[self.isna()] = na_value
+        return result
+
+    @property
+    def dtype(self) -> pd.api.extensions.ExtensionDtype:
+        return PythonObjectDtype()
+
+    @property
+    def nbytes(self) -> int:
+        return self.values.nbytes
+
+    def __arrow_array__(self, type=None):
+        return ray.air.util.object_extensions.arrow.ArrowPythonObjectArray.from_objects(
+            self.values
+        )
+
+    def isna(self) -> np.ndarray:
+        return pd.isnull(self.values)
+
+    def take(
+        self,
+        indices: TakeIndexer,
+        *,
+        allow_fill: bool = False,
+        fill_value: typing.Any = None,
+    ) -> "PythonObjectArray":
+        if allow_fill and fill_value is None:
+            fill_value = self.dtype.na_value
+
+        result = pd.core.algorithms.take(
+            self.values, indices, allow_fill=allow_fill, fill_value=fill_value
+        )
+        return self._from_sequence(result, dtype=self.dtype)
+
+    def copy(self) -> "PythonObjectArray":
+        return PythonObjectArray(self.values)
+
+    @classmethod
+    def _concat_same_type(
+        cls, to_concat: collections.abc.Sequence["PythonObjectArray"]
+    ) -> "PythonObjectArray":
+        values_to_concat = [element.values for element in to_concat]
+        return cls(np.concatenate(values_to_concat))
+
+
+@PublicAPI(stability="alpha")
+@pd.api.extensions.register_extension_dtype
+class PythonObjectDtype(pd.api.extensions.ExtensionDtype):
+    @classmethod
+    def construct_from_string(cls, string: str):
+        if string != "python_object()":
+            raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
+        return cls()
+
+    @property
+    def type(self):
+        """
+        The scalar type for the array, e.g. ``int``
+        It's expected ``ExtensionArray[item]`` returns an instance
+        of ``ExtensionDtype.type`` for scalar ``item``, assuming
+        that value is valid (not NA). NA values do not need to be
+        instances of `type`.
+        """
+        return object
+
+    @property
+    def name(self) -> str:
+        return "python_object()"
+
+    @classmethod
+    def construct_array_type(cls: type) -> type:
+        """
+        Return the array type associated with this dtype.
+        """
+        return PythonObjectArray
+
+    def __from_arrow__(
+        self, array: typing.Union[pa.Array, pa.ChunkedArray]
+    ) -> PythonObjectArray:
+        return PythonObjectArray(array.to_pylist())
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4548521f8cbcb2d763a91a2f1a0eee9fb4bd575
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/arrow.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/arrow.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b097e67a649566d3944f1bafe1148594963d4eac
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/arrow.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/pandas.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/pandas.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ac4b03b06be3cf5f0a7931e35969a247a5fe2928
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/pandas.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e904ade212ecfde4db5a6ff1fa64fe57043fe9e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/__pycache__/utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/arrow.py b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/arrow.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8b62d86fb1ded7ba49900e7ed7c76fdcd3a99ad
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/arrow.py
@@ -0,0 +1,1224 @@
+import abc
+import itertools
+import json
+import logging
+import sys
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import pyarrow as pa
+from packaging.version import parse as parse_version
+
+from ray._private.utils import _get_pyarrow_version
+from ray.air.constants import TENSOR_COLUMN_NAME
+from ray.air.util.tensor_extensions.utils import (
+    _is_ndarray_tensor,
+    _is_ndarray_variable_shaped_tensor,
+    create_ragged_ndarray,
+)
+from ray.data._internal.util import GiB
+from ray.util import log_once
+from ray.util.annotations import DeveloperAPI, PublicAPI
+
+PYARROW_VERSION = _get_pyarrow_version()
+if PYARROW_VERSION is not None:
+    PYARROW_VERSION = parse_version(PYARROW_VERSION)
+# Minimum version of Arrow that supports ExtensionScalars.
+# TODO(Clark): Remove conditional definition once we only support Arrow 8.0.0+.
+MIN_PYARROW_VERSION_SCALAR = parse_version("8.0.0")
+# Minimum version of Arrow that supports subclassable ExtensionScalars.
+# TODO(Clark): Remove conditional definition once we only support Arrow 9.0.0+.
+MIN_PYARROW_VERSION_SCALAR_SUBCLASS = parse_version("9.0.0")
+# Minimum version supporting `zero_copy_only` flag in `ChunkedArray.to_numpy`
+MIN_PYARROW_VERSION_CHUNKED_ARRAY_TO_NUMPY_ZERO_COPY_ONLY = parse_version("13.0.0")
+
+NUM_BYTES_PER_UNICODE_CHAR = 4
+
+# NOTE: Overflow threshold in bytes for most Arrow types using int32 as
+#       its offsets
+INT32_OVERFLOW_THRESHOLD = 2 * GiB
+
+logger = logging.getLogger(__name__)
+
+
+@DeveloperAPI
+class ArrowConversionError(Exception):
+    """Error raised when there is an issue converting data to Arrow."""
+
+    MAX_DATA_STR_LEN = 200
+
+    def __init__(self, data_str: str):
+        if len(data_str) > self.MAX_DATA_STR_LEN:
+            data_str = data_str[: self.MAX_DATA_STR_LEN] + "..."
+        message = f"Error converting data to Arrow: {data_str}"
+        super().__init__(message)
+
+
+def _arrow_supports_extension_scalars():
+    """
+    Whether Arrow ExtensionScalars are supported in the current pyarrow version.
+
+    This returns True if the pyarrow version is 8.0.0+, or if the pyarrow version is
+    unknown.
+    """
+    # TODO(Clark): Remove utility once we only support Arrow 8.0.0+.
+    return PYARROW_VERSION is None or PYARROW_VERSION >= MIN_PYARROW_VERSION_SCALAR
+
+
+def _arrow_extension_scalars_are_subclassable():
+    """
+    Whether Arrow ExtensionScalars support subclassing in the current pyarrow version.
+
+    This returns True if the pyarrow version is 9.0.0+, or if the pyarrow version is
+    unknown.
+    """
+    # TODO(Clark): Remove utility once we only support Arrow 9.0.0+.
+    return (
+        PYARROW_VERSION is None
+        or PYARROW_VERSION >= MIN_PYARROW_VERSION_SCALAR_SUBCLASS
+    )
+
+
+@DeveloperAPI
+def pyarrow_table_from_pydict(
+    pydict: Dict[str, Union[List[Any], pa.Array]],
+) -> pa.Table:
+    """
+    Convert a Python dictionary to a pyarrow Table.
+
+    Raises:
+        ArrowConversionError: if the conversion fails.
+    """
+    try:
+        return pa.Table.from_pydict(pydict)
+    except Exception as e:
+        raise ArrowConversionError(str(pydict)) from e
+
+
+@DeveloperAPI(stability="alpha")
+def convert_to_pyarrow_array(column_values: np.ndarray, column_name: str) -> pa.Array:
+    """Converts provided NumPy `ndarray` into PyArrow's `array` while utilizing
+    both Arrow's natively supported types as well as custom extension types:
+
+        - ArrowTensorArray (for tensors)
+        - ArrowPythonObjectArray (for user-defined python class objects, as well as
+        any python object that aren't represented by a corresponding Arrow's native
+        scalar type)
+    """
+
+    try:
+        # Since Arrow does NOT support tensors (aka multidimensional arrays) natively,
+        # we have to make sure that we handle this case utilizing `ArrowTensorArray`
+        # extension type
+        if column_name == TENSOR_COLUMN_NAME or _is_ndarray_tensor(column_values):
+            from ray.data.extensions.tensor_extension import ArrowTensorArray
+
+            return ArrowTensorArray.from_numpy(column_values, column_name)
+        else:
+            return _convert_to_pyarrow_native_array(column_values, column_name)
+
+    except ArrowConversionError as ace:
+        from ray.data import DataContext
+        from ray.data.extensions.object_extension import (
+            ArrowPythonObjectArray,
+            _object_extension_type_allowed,
+        )
+
+        enable_fallback_config: Optional[
+            bool
+        ] = DataContext.get_current().enable_fallback_to_arrow_object_ext_type
+
+        if not _object_extension_type_allowed():
+            object_ext_type_fallback_allowed = False
+            object_ext_type_detail = (
+                "skipping fallback to serialize as pickled python"
+                f" objects (due to unsupported Arrow version {PYARROW_VERSION}, "
+                f"min required version is {MIN_PYARROW_VERSION_SCALAR_SUBCLASS})"
+            )
+        else:
+            # NOTE: By default setting is unset which (for compatibility reasons)
+            #       is allowing the fallback
+            object_ext_type_fallback_allowed = (
+                enable_fallback_config is None or enable_fallback_config
+            )
+
+            if object_ext_type_fallback_allowed:
+                object_ext_type_detail = (
+                    "falling back to serialize as pickled python objects"
+                )
+            else:
+                object_ext_type_detail = (
+                    "skipping fallback to serialize as pickled python objects "
+                    "(due to DataContext.enable_fallback_to_arrow_object_ext_type "
+                    "= False)"
+                )
+
+        if not object_ext_type_fallback_allowed:
+            # To avoid logging following warning for every block it's
+            # only going to be logged in following cases
+            #   - When fallback is disallowed, and
+            #   - Fallback configuration is not set or set to false, and
+            #   - It's being logged for the first time
+            if not enable_fallback_config and log_once(
+                "_fallback_to_arrow_object_extension_type_warning"
+            ):
+                logger.warning(
+                    f"Failed to convert column '{column_name}' into pyarrow "
+                    f"array due to: {ace}; {object_ext_type_detail}",
+                    exc_info=ace,
+                )
+
+            # If `ArrowPythonObjectType` is not supported raise original exception
+            raise
+
+        # Otherwise, attempt to fall back to serialize as python objects
+        return ArrowPythonObjectArray.from_objects(column_values)
+
+
+def _convert_to_pyarrow_native_array(
+    column_values: np.ndarray, column_name: str
+) -> pa.Array:
+    """Converts provided NumPy `ndarray` into PyArrow's `array` while only utilizing
+    Arrow's natively supported types (ie no custom extension types)"""
+
+    try:
+        # NOTE: We explicitly infer PyArrow `DataType` so that
+        #       we can perform upcasting to be able to accommodate
+        #       blocks that are larger than 2Gb in size (limited
+        #       by int32 offsets used by Arrow internally)
+        dtype = _infer_pyarrow_type(column_values)
+
+        logger.log(
+            logging.getLevelName("TRACE"),
+            f"Inferred dtype of '{dtype}' for column '{column_name}'",
+        )
+
+        return pa.array(column_values, type=dtype)
+    except Exception as e:
+        raise ArrowConversionError(str(column_values)) from e
+
+
+def _infer_pyarrow_type(column_values: np.ndarray) -> Optional[pa.DataType]:
+    """Infers target Pyarrow `DataType` based on the provided
+    columnar values.
+
+    NOTE: This is a wrapper on top of `pa.infer_type(...)` utility
+          performing up-casting of `binary` and `string` types to
+          corresponding `large_binary` and `large_string` types in case
+          any of the array elements exceeds 2Gb in size therefore
+          making it impossible for original types to accommodate such
+          values.
+
+          Unfortunately, for unknown reasons PA doesn't perform
+          that upcasting itself henceforth we have to do perform
+          it manually
+
+    Args:
+        column_values: List of columnar values
+
+    Returns:
+        Instance of PyArrow's `DataType` based on the provided
+        column values
+    """
+
+    if len(column_values) == 0:
+        return None
+
+    inferred_pa_dtype = pa.infer_type(column_values)
+
+    def _len_gt_overflow_threshold(obj: Any) -> bool:
+        # NOTE: This utility could be seeing objects other than strings or bytes in
+        #       cases when column contains non-scalar non-homogeneous object types as
+        #       column values, therefore making Arrow unable to infer corresponding
+        #       column type appropriately, therefore falling back to assume the type
+        #       of the first element in the list.
+        #
+        #       Check out test cases for this method for an additional context.
+        if isinstance(obj, (str, bytes)):
+            return len(obj) > INT32_OVERFLOW_THRESHOLD
+
+        return False
+
+    if pa.types.is_binary(inferred_pa_dtype) and any(
+        [_len_gt_overflow_threshold(v) for v in column_values]
+    ):
+        return pa.large_binary()
+    elif pa.types.is_string(inferred_pa_dtype) and any(
+        [_len_gt_overflow_threshold(v) for v in column_values]
+    ):
+        return pa.large_string()
+
+    return inferred_pa_dtype
+
+
+@DeveloperAPI
+def get_arrow_extension_tensor_types():
+    """Returns list of extension types of Arrow Array holding
+    multidimensional tensors
+    """
+    return (
+        *get_arrow_extension_fixed_shape_tensor_types(),
+        *get_arrow_extension_variable_shape_tensor_types(),
+    )
+
+
+@DeveloperAPI
+def get_arrow_extension_fixed_shape_tensor_types():
+    """Returns list of Arrow extension types holding multidimensional
+    tensors of *fixed* shape
+    """
+    return ArrowTensorType, ArrowTensorTypeV2
+
+
+@DeveloperAPI
+def get_arrow_extension_variable_shape_tensor_types():
+    """Returns list of Arrow extension types holding multidimensional
+    tensors of *fixed* shape
+    """
+    return (ArrowVariableShapedTensorType,)
+
+
+class _BaseFixedShapeArrowTensorType(pa.ExtensionType, abc.ABC):
+    """
+    Arrow ExtensionType for an array of fixed-shaped, homogeneous-typed
+    tensors.
+
+    This is the Arrow side of TensorDtype.
+
+    See Arrow extension type docs:
+    https://arrow.apache.org/docs/python/extending_types.html#defining-extension-types-user-defined-types
+    """
+
+    def __init__(
+        self, shape: Tuple[int, ...], tensor_dtype: pa.DataType, ext_type_id: str
+    ):
+        self._shape = shape
+
+        super().__init__(tensor_dtype, ext_type_id)
+
+    @property
+    def shape(self):
+        """
+        Shape of contained tensors.
+        """
+        return self._shape
+
+    @property
+    def scalar_type(self):
+        """Returns the type of the underlying tensor elements."""
+        return self.storage_type.value_type
+
+    def to_pandas_dtype(self):
+        """
+        Convert Arrow extension type to corresponding Pandas dtype.
+
+        Returns:
+            An instance of pd.api.extensions.ExtensionDtype.
+        """
+        from ray.air.util.tensor_extensions.pandas import TensorDtype
+
+        return TensorDtype(self._shape, self.scalar_type.to_pandas_dtype())
+
+    def __reduce__(self):
+        return self.__arrow_ext_deserialize__, (
+            self.storage_type,
+            self.__arrow_ext_serialize__(),
+        )
+
+    def __arrow_ext_serialize__(self):
+        return json.dumps(self._shape).encode()
+
+    def __arrow_ext_class__(self):
+        """
+        ExtensionArray subclass with custom logic for this array of tensors
+        type.
+
+        Returns:
+            A subclass of pd.api.extensions.ExtensionArray.
+        """
+        return ArrowTensorArray
+
+    if _arrow_extension_scalars_are_subclassable():
+        # TODO(Clark): Remove this version guard once we only support Arrow 9.0.0+.
+        def __arrow_ext_scalar_class__(self):
+            """
+            ExtensionScalar subclass with custom logic for this array of tensors type.
+            """
+            return ArrowTensorScalar
+
+    if _arrow_supports_extension_scalars():
+        # TODO(Clark): Remove this version guard once we only support Arrow 8.0.0+.
+        def _extension_scalar_to_ndarray(
+            self, scalar: pa.ExtensionScalar
+        ) -> np.ndarray:
+            """
+            Convert an ExtensionScalar to a tensor element.
+            """
+            raw_values = scalar.value.values
+            shape = scalar.type.shape
+            value_type = raw_values.type
+            offset = raw_values.offset
+            data_buffer = raw_values.buffers()[1]
+            return _to_ndarray_helper(shape, value_type, offset, data_buffer)
+
+    def __str__(self) -> str:
+        return (
+            f"numpy.ndarray(shape={self.shape}, dtype={self.storage_type.value_type})"
+        )
+
+    def __repr__(self) -> str:
+        return str(self)
+
+    @classmethod
+    def _need_variable_shaped_tensor_array(
+        cls,
+        array_types: Sequence[
+            Union[
+                "ArrowTensorType", "ArrowTensorTypeV2", "ArrowVariableShapedTensorType"
+            ]
+        ],
+    ) -> bool:
+        """
+        Whether the provided list of tensor types needs a variable-shaped
+        representation (i.e. `ArrowVariableShapedTensorType`) when concatenating
+        or chunking. If one or more of the tensor types in `array_types` are
+        variable-shaped and/or any of the tensor arrays have a different shape
+        than the others, a variable-shaped tensor array representation will be
+        required and this method will return True.
+
+        Args:
+            array_types: List of tensor types to check if a variable-shaped
+            representation is required for concatenation
+
+        Returns:
+            True if concatenating arrays with types `array_types` requires
+            a variable-shaped representation
+        """
+        shape = None
+        for arr_type in array_types:
+            # If at least one of the arrays is variable-shaped, we can immediately
+            # short-circuit since we require a variable-shaped representation.
+            if isinstance(arr_type, ArrowVariableShapedTensorType):
+                return True
+            if not isinstance(arr_type, get_arrow_extension_fixed_shape_tensor_types()):
+                raise ValueError(
+                    "All provided array types must be an instance of either "
+                    "ArrowTensorType or ArrowVariableShapedTensorType, but "
+                    f"got {arr_type}"
+                )
+            # We need variable-shaped representation if any of the tensor arrays have
+            # different shapes.
+            if shape is not None and arr_type.shape != shape:
+                return True
+            shape = arr_type.shape
+        return False
+
+
+@PublicAPI(stability="beta")
+class ArrowTensorType(_BaseFixedShapeArrowTensorType):
+    """Arrow ExtensionType (v1) for tensors.
+
+    NOTE: This type does *NOT* support tensors larger than 4Gb (due to
+          overflow of int32 offsets utilized inside Pyarrow `ListType`)
+    """
+
+    OFFSET_DTYPE = np.int32
+
+    def __init__(self, shape: Tuple[int, ...], dtype: pa.DataType):
+        """
+        Construct the Arrow extension type for array of fixed-shaped tensors.
+
+        Args:
+            shape: Shape of contained tensors.
+            dtype: pyarrow dtype of tensor elements.
+        """
+
+        super().__init__(shape, pa.list_(dtype), "ray.data.arrow_tensor")
+
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        shape = tuple(json.loads(serialized))
+        return cls(shape, storage_type.value_type)
+
+
+@PublicAPI(stability="alpha")
+class ArrowTensorTypeV2(_BaseFixedShapeArrowTensorType):
+    """Arrow ExtensionType (v2) for tensors (supporting tensors > 4Gb)."""
+
+    OFFSET_DTYPE = np.int64
+
+    def __init__(self, shape: Tuple[int, ...], dtype: pa.DataType):
+        """
+        Construct the Arrow extension type for array of fixed-shaped tensors.
+
+        Args:
+            shape: Shape of contained tensors.
+            dtype: pyarrow dtype of tensor elements.
+        """
+
+        super().__init__(shape, pa.large_list(dtype), "ray.data.arrow_tensor_v2")
+
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        shape = tuple(json.loads(serialized))
+        return cls(shape, storage_type.value_type)
+
+
+if _arrow_extension_scalars_are_subclassable():
+    # TODO(Clark): Remove this version guard once we only support Arrow 9.0.0+.
+    @PublicAPI(stability="beta")
+    class ArrowTensorScalar(pa.ExtensionScalar):
+        def as_py(self) -> np.ndarray:
+            return self.type._extension_scalar_to_ndarray(self)
+
+        def __array__(self) -> np.ndarray:
+            return self.as_py()
+
+
+# TODO(Clark): Remove this mixin once we only support Arrow 9.0.0+.
+class _ArrowTensorScalarIndexingMixin:
+    """
+    A mixin providing support for scalar indexing in tensor extension arrays for
+    Arrow < 9.0.0, before full ExtensionScalar support was added. This mixin overrides
+    __getitem__, __iter__, and to_pylist.
+    """
+
+    # This mixin will be a no-op (no methods added) for Arrow 9.0.0+.
+    if not _arrow_extension_scalars_are_subclassable():
+        # NOTE: These __iter__ and to_pylist definitions are shared for both
+        # Arrow < 8.0.0 and Arrow 8.*.
+        def __iter__(self):
+            # Override pa.Array.__iter__() in order to return an iterator of
+            # properly shaped tensors instead of an iterator of flattened tensors.
+            # See comment in above __getitem__ method.
+            for i in range(len(self)):
+                # Use overridden __getitem__ method.
+                yield self.__getitem__(i)
+
+        def to_pylist(self):
+            # Override pa.Array.to_pylist() due to a lack of ExtensionScalar
+            # support (see comment in __getitem__).
+            return list(self)
+
+        if _arrow_supports_extension_scalars():
+            # NOTE(Clark): This __getitem__ override is only needed for Arrow 8.*,
+            # before ExtensionScalar subclassing support was added.
+            # TODO(Clark): Remove these methods once we only support Arrow 9.0.0+.
+            def __getitem__(self, key):
+                # This __getitem__ hook allows us to support proper indexing when
+                # accessing a single tensor (a "scalar" item of the array). Without this
+                # hook for integer keys, the indexing will fail on pyarrow < 9.0.0 due
+                # to a lack of ExtensionScalar subclassing support.
+
+                # NOTE(Clark): We'd like to override the pa.Array.getitem() helper
+                # instead, which would obviate the need for overriding __iter__(), but
+                # unfortunately overriding Cython cdef methods with normal Python
+                # methods isn't allowed.
+                item = super().__getitem__(key)
+                if not isinstance(key, slice):
+                    item = item.type._extension_scalar_to_ndarray(item)
+                return item
+
+        else:
+            # NOTE(Clark): This __getitem__ override is only needed for Arrow < 8.0.0,
+            # before any ExtensionScalar support was added.
+            # TODO(Clark): Remove these methods once we only support Arrow 8.0.0+.
+            def __getitem__(self, key):
+                # This __getitem__ hook allows us to support proper indexing when
+                # accessing a single tensor (a "scalar" item of the array). Without this
+                # hook for integer keys, the indexing will fail on pyarrow < 8.0.0 due
+                # to a lack of ExtensionScalar support.
+
+                # NOTE(Clark): We'd like to override the pa.Array.getitem() helper
+                # instead, which would obviate the need for overriding __iter__(), but
+                # unfortunately overriding Cython cdef methods with normal Python
+                # methods isn't allowed.
+                if isinstance(key, slice):
+                    return super().__getitem__(key)
+                return self._to_numpy(key)
+
+
+# NOTE: We need to inherit from the mixin before pa.ExtensionArray to ensure that the
+# mixin's overriding methods appear first in the MRO.
+# TODO(Clark): Remove this mixin once we only support Arrow 9.0.0+.
+@PublicAPI(stability="beta")
+class ArrowTensorArray(_ArrowTensorScalarIndexingMixin, pa.ExtensionArray):
+    """
+    An array of fixed-shape, homogeneous-typed tensors.
+
+    This is the Arrow side of TensorArray.
+
+    See Arrow docs for customizing extension arrays:
+    https://arrow.apache.org/docs/python/extending_types.html#custom-extension-array-class
+    """
+
+    @classmethod
+    def from_numpy(
+        cls,
+        arr: Union[np.ndarray, Iterable[np.ndarray]],
+        column_name: Optional[str] = None,
+    ) -> Union["ArrowTensorArray", "ArrowVariableShapedTensorArray"]:
+        """
+        Convert an ndarray or an iterable of ndarrays to an array of homogeneous-typed
+        tensors. If given fixed-shape tensor elements, this will return an
+        ``ArrowTensorArray``; if given variable-shape tensor elements, this will return
+        an ``ArrowVariableShapedTensorArray``.
+
+        Args:
+            arr: An ndarray or an iterable of ndarrays.
+            column_name: Optional. Used only in logging outputs to provide
+                additional details.
+
+        Returns:
+            - If fixed-shape tensor elements, an ``ArrowTensorArray`` containing
+              ``len(arr)`` tensors of fixed shape.
+            - If variable-shaped tensor elements, an ``ArrowVariableShapedTensorArray``
+              containing ``len(arr)`` tensors of variable shape.
+            - If scalar elements, a ``pyarrow.Array``.
+        """
+        if not isinstance(arr, np.ndarray) and isinstance(arr, Iterable):
+            arr = list(arr)
+
+        if isinstance(arr, (list, tuple)) and arr and isinstance(arr[0], np.ndarray):
+            # Stack ndarrays and pass through to ndarray handling logic below.
+            try:
+                arr = np.stack(arr, axis=0)
+            except ValueError as ve:
+                logger.warning(
+                    f"Failed to stack lists due to: {ve}; "
+                    f"falling back to using np.array(..., dtype=object)",
+                    exc_info=ve,
+                )
+
+                # ndarray stacking may fail if the arrays are heterogeneously-shaped.
+                arr = np.array(arr, dtype=object)
+        if not isinstance(arr, np.ndarray):
+            raise ValueError(
+                f"Must give ndarray or iterable of ndarrays, got {type(arr)} {arr}"
+            )
+
+        try:
+            return cls._from_numpy(arr)
+        except Exception as e:
+            data_str = ""
+            if column_name:
+                data_str += f"column: '{column_name}', "
+            data_str += f"shape: {arr.shape}, dtype: {arr.dtype}, data: {arr}"
+            raise ArrowConversionError(data_str) from e
+
+    @classmethod
+    def _from_numpy(
+        cls,
+        arr: np.ndarray,
+    ) -> Union["ArrowTensorArray", "ArrowVariableShapedTensorArray"]:
+        if len(arr) > 0 and np.isscalar(arr[0]):
+            # Elements are scalar so a plain Arrow Array will suffice.
+            return pa.array(arr)
+        if _is_ndarray_variable_shaped_tensor(arr):
+            # Tensor elements have variable shape, so we delegate to
+            # ArrowVariableShapedTensorArray.
+            return ArrowVariableShapedTensorArray.from_numpy(arr)
+        if not arr.flags.c_contiguous:
+            # We only natively support C-contiguous ndarrays.
+            arr = np.ascontiguousarray(arr)
+        scalar_dtype = pa.from_numpy_dtype(arr.dtype)
+        if pa.types.is_string(scalar_dtype):
+            if arr.dtype.byteorder == ">" or (
+                arr.dtype.byteorder == "=" and sys.byteorder == "big"
+            ):
+                raise ValueError(
+                    "Only little-endian string tensors are supported, "
+                    f"but got: {arr.dtype}",
+                )
+            scalar_dtype = pa.binary(arr.dtype.itemsize)
+        outer_len = arr.shape[0]
+        element_shape = arr.shape[1:]
+        total_num_items = arr.size
+        num_items_per_element = np.prod(element_shape) if element_shape else 1
+
+        # Data buffer.
+        if pa.types.is_boolean(scalar_dtype):
+            # NumPy doesn't represent boolean arrays as bit-packed, so we manually
+            # bit-pack the booleans before handing the buffer off to Arrow.
+            # NOTE: Arrow expects LSB bit-packed ordering.
+            # NOTE: This creates a copy.
+            arr = np.packbits(arr, bitorder="little")
+        data_buffer = pa.py_buffer(arr)
+        data_array = pa.Array.from_buffers(
+            scalar_dtype, total_num_items, [None, data_buffer]
+        )
+
+        from ray.data import DataContext
+
+        if DataContext.get_current().use_arrow_tensor_v2:
+            pa_type_ = ArrowTensorTypeV2(element_shape, scalar_dtype)
+        else:
+            pa_type_ = ArrowTensorType(element_shape, scalar_dtype)
+
+        # Create Offset buffer
+        offset_buffer = pa.py_buffer(
+            pa_type_.OFFSET_DTYPE(
+                [i * num_items_per_element for i in range(outer_len + 1)]
+            )
+        )
+
+        storage = pa.Array.from_buffers(
+            pa_type_.storage_type,
+            outer_len,
+            [None, offset_buffer],
+            children=[data_array],
+        )
+
+        return pa.ExtensionArray.from_storage(pa_type_, storage)
+
+    def _to_numpy(self, index: Optional[int] = None, zero_copy_only: bool = False):
+        """
+        Helper for getting either an element of the array of tensors as an
+        ndarray, or the entire array of tensors as a single ndarray.
+
+        Args:
+            index: The index of the tensor element that we wish to return as
+                an ndarray. If not given, the entire array of tensors is
+                returned as an ndarray.
+            zero_copy_only: If True, an exception will be raised if the
+                conversion to a NumPy array would require copying the
+                underlying data (e.g. in presence of nulls, or for
+                non-primitive types). This argument is currently ignored, so
+                zero-copy isn't enforced even if this argument is true.
+
+        Returns:
+            The corresponding tensor element as an ndarray if an index was
+            given, or the entire array of tensors as an ndarray otherwise.
+        """
+        # TODO(Clark): Enforce zero_copy_only.
+        # TODO(Clark): Support strides?
+        # Buffers schema:
+        # [None, offset_buffer, None, data_buffer]
+        buffers = self.buffers()
+        data_buffer = buffers[3]
+        storage_list_type = self.storage.type
+        value_type = storage_list_type.value_type
+        ext_dtype = value_type.to_pandas_dtype()
+        shape = self.type.shape
+        if pa.types.is_boolean(value_type):
+            # Arrow boolean array buffers are bit-packed, with 8 entries per byte,
+            # and are accessed via bit offsets.
+            buffer_item_width = value_type.bit_width
+        else:
+            # We assume all other array types are accessed via byte array
+            # offsets.
+            buffer_item_width = value_type.bit_width // 8
+        # Number of items per inner ndarray.
+        num_items_per_element = np.prod(shape) if shape else 1
+        # Base offset into data buffer, e.g. due to zero-copy slice.
+        buffer_offset = self.offset * num_items_per_element
+        # Offset of array data in buffer.
+        offset = buffer_item_width * buffer_offset
+        if index is not None:
+            # Getting a single tensor element of the array.
+            offset_buffer = buffers[1]
+            offset_array = np.ndarray(
+                (len(self),), buffer=offset_buffer, dtype=self.type.OFFSET_DTYPE
+            )
+            # Offset into array to reach logical index.
+            index_offset = offset_array[index]
+            # Add the index offset to the base offset.
+            offset += buffer_item_width * index_offset
+        else:
+            # Getting the entire array of tensors.
+            shape = (len(self),) + shape
+        if pa.types.is_boolean(value_type):
+            # Special handling for boolean arrays, since Arrow bit-packs boolean arrays
+            # while NumPy does not.
+            # Cast as uint8 array and let NumPy unpack into a boolean view.
+            # Offset into uint8 array, where each element is a bucket for 8 booleans.
+            byte_bucket_offset = offset // 8
+            # Offset for a specific boolean, within a uint8 array element.
+            bool_offset = offset % 8
+            # The number of uint8 array elements (buckets) that our slice spans.
+            # Note that, due to the offset for a specific boolean, the slice can span
+            # byte boundaries even if it contains less than 8 booleans.
+            num_boolean_byte_buckets = 1 + ((bool_offset + np.prod(shape) - 1) // 8)
+            # Construct the uint8 array view on the buffer.
+            arr = np.ndarray(
+                (num_boolean_byte_buckets,),
+                dtype=np.uint8,
+                buffer=data_buffer,
+                offset=byte_bucket_offset,
+            )
+            # Unpack into a byte per boolean, using LSB bit-packed ordering.
+            arr = np.unpackbits(arr, bitorder="little")
+            # Interpret buffer as boolean array.
+            return np.ndarray(shape, dtype=np.bool_, buffer=arr, offset=bool_offset)
+        # Special handling of binary/string types. Assumes unicode string tensor columns
+        if pa.types.is_fixed_size_binary(value_type):
+            ext_dtype = np.dtype(
+                f"<U{value_type.byte_width // NUM_BYTES_PER_UNICODE_CHAR}"
+            )
+        return np.ndarray(shape, dtype=ext_dtype, buffer=data_buffer, offset=offset)
+
+    def to_numpy(self, zero_copy_only: bool = True):
+        """
+        Convert the entire array of tensors into a single ndarray.
+
+        Args:
+            zero_copy_only: If True, an exception will be raised if the
+                conversion to a NumPy array would require copying the
+                underlying data (e.g. in presence of nulls, or for
+                non-primitive types). This argument is currently ignored, so
+                zero-copy isn't enforced even if this argument is true.
+
+        Returns:
+            A single ndarray representing the entire array of tensors.
+        """
+        return self._to_numpy(zero_copy_only=zero_copy_only)
+
+    @classmethod
+    def _concat_same_type(
+        cls,
+        to_concat: Sequence[
+            Union["ArrowTensorArray", "ArrowVariableShapedTensorArray"]
+        ],
+    ) -> Union["ArrowTensorArray", "ArrowVariableShapedTensorArray"]:
+        """
+        Concatenate multiple tensor arrays.
+
+        If one or more of the tensor arrays in to_concat are variable-shaped and/or any
+        of the tensor arrays have a different shape than the others, a variable-shaped
+        tensor array will be returned.
+        """
+        to_concat_types = [arr.type for arr in to_concat]
+        if ArrowTensorType._need_variable_shaped_tensor_array(to_concat_types):
+            # Need variable-shaped tensor array.
+            # TODO(Clark): Eliminate this NumPy roundtrip by directly constructing the
+            # underlying storage array buffers (NumPy roundtrip will not be zero-copy
+            # for e.g. boolean arrays).
+            # NOTE(Clark): Iterating over a tensor extension array converts each element
+            # to an ndarray view.
+            return ArrowVariableShapedTensorArray.from_numpy(
+                [e for a in to_concat for e in a]
+            )
+        else:
+            storage = pa.concat_arrays([c.storage for c in to_concat])
+
+            return ArrowTensorArray.from_storage(to_concat[0].type, storage)
+
+    @classmethod
+    def _chunk_tensor_arrays(
+        cls, arrs: Sequence[Union["ArrowTensorArray", "ArrowVariableShapedTensorArray"]]
+    ) -> pa.ChunkedArray:
+        """
+        Create a ChunkedArray from multiple tensor arrays.
+        """
+        arrs_types = [arr.type for arr in arrs]
+        if ArrowTensorType._need_variable_shaped_tensor_array(arrs_types):
+            new_arrs = []
+            for a in arrs:
+                if isinstance(a.type, get_arrow_extension_fixed_shape_tensor_types()):
+                    a = a.to_variable_shaped_tensor_array()
+                assert isinstance(a.type, ArrowVariableShapedTensorType)
+                new_arrs.append(a)
+            arrs = new_arrs
+        return pa.chunked_array(arrs)
+
+    def to_variable_shaped_tensor_array(self) -> "ArrowVariableShapedTensorArray":
+        """
+        Convert this tensor array to a variable-shaped tensor array.
+
+        This is primarily used when concatenating multiple chunked tensor arrays where
+        at least one chunked array is already variable-shaped and/or the shapes of the
+        chunked arrays differ, in which case the resulting concatenated tensor array
+        will need to be in the variable-shaped representation.
+        """
+        # TODO(Clark): Eliminate this NumPy roundtrip by directly constructing the
+        # underlying storage array buffers (NumPy roundtrip will not be zero-copy for
+        # e.g. boolean arrays).
+        return ArrowVariableShapedTensorArray.from_numpy(self.to_numpy())
+
+
+@PublicAPI(stability="alpha")
+class ArrowVariableShapedTensorType(pa.ExtensionType):
+    """
+    Arrow ExtensionType for an array of heterogeneous-shaped, homogeneous-typed
+    tensors.
+
+    This is the Arrow side of TensorDtype for tensor elements with different shapes.
+    Note that this extension only supports non-ragged tensor elements; i.e., when
+    considering each tensor element in isolation, they must have a well-defined,
+    non-ragged shape.
+
+    See Arrow extension type docs:
+    https://arrow.apache.org/docs/python/extending_types.html#defining-extension-types-user-defined-types
+    """
+
+    def __init__(self, dtype: pa.DataType, ndim: int):
+        """
+        Construct the Arrow extension type for array of heterogeneous-shaped tensors.
+
+        Args:
+            dtype: pyarrow dtype of tensor elements.
+            ndim: The number of dimensions in the tensor elements.
+        """
+        self._ndim = ndim
+        super().__init__(
+            pa.struct(
+                [("data", pa.large_list(dtype)), ("shape", pa.list_(pa.int64()))]
+            ),
+            "ray.data.arrow_variable_shaped_tensor",
+        )
+
+    def to_pandas_dtype(self):
+        """
+        Convert Arrow extension type to corresponding Pandas dtype.
+
+        Returns:
+            An instance of pd.api.extensions.ExtensionDtype.
+        """
+        from ray.air.util.tensor_extensions.pandas import TensorDtype
+
+        return TensorDtype(
+            (None,) * self.ndim,
+            self.storage_type["data"].type.value_type.to_pandas_dtype(),
+        )
+
+    @property
+    def ndim(self) -> int:
+        """Return the number of dimensions in the tensor elements."""
+        return self._ndim
+
+    @property
+    def scalar_type(self):
+        """Returns the type of the underlying tensor elements."""
+        data_field_index = self.storage_type.get_field_index("data")
+        return self.storage_type[data_field_index].type.value_type
+
+    def __reduce__(self):
+        return self.__arrow_ext_deserialize__, (
+            self.storage_type,
+            self.__arrow_ext_serialize__(),
+        )
+
+    def __arrow_ext_serialize__(self):
+        return json.dumps(self._ndim).encode()
+
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        ndim = json.loads(serialized)
+        dtype = storage_type["data"].type.value_type
+        return cls(dtype, ndim)
+
+    def __arrow_ext_class__(self):
+        """
+        ExtensionArray subclass with custom logic for this array of tensors
+        type.
+
+        Returns:
+            A subclass of pd.api.extensions.ExtensionArray.
+        """
+        return ArrowVariableShapedTensorArray
+
+    if _arrow_extension_scalars_are_subclassable():
+        # TODO(Clark): Remove this version guard once we only support Arrow 9.0.0+.
+        def __arrow_ext_scalar_class__(self):
+            """
+            ExtensionScalar subclass with custom logic for this array of tensors type.
+            """
+            return ArrowTensorScalar
+
+    def __str__(self) -> str:
+        dtype = self.storage_type["data"].type.value_type
+        return f"numpy.ndarray(ndim={self.ndim}, dtype={dtype})"
+
+    def __repr__(self) -> str:
+        return str(self)
+
+    if _arrow_supports_extension_scalars():
+        # TODO(Clark): Remove this version guard once we only support Arrow 8.0.0+.
+        def _extension_scalar_to_ndarray(
+            self, scalar: pa.ExtensionScalar
+        ) -> np.ndarray:
+            """
+            Convert an ExtensionScalar to a tensor element.
+            """
+            data = scalar.value.get("data")
+            raw_values = data.values
+
+            shape = tuple(scalar.value.get("shape").as_py())
+            value_type = raw_values.type
+            offset = raw_values.offset
+            data_buffer = raw_values.buffers()[1]
+            return _to_ndarray_helper(shape, value_type, offset, data_buffer)
+
+
+# NOTE: We need to inherit from the mixin before pa.ExtensionArray to ensure that the
+# mixin's overriding methods appear first in the MRO.
+# TODO(Clark): Remove this mixin once we only support Arrow 9.0.0+.
+@PublicAPI(stability="alpha")
+class ArrowVariableShapedTensorArray(
+    _ArrowTensorScalarIndexingMixin, pa.ExtensionArray
+):
+    """
+    An array of heterogeneous-shaped, homogeneous-typed tensors.
+
+    This is the Arrow side of TensorArray for tensor elements that have differing
+    shapes. Note that this extension only supports non-ragged tensor elements; i.e.,
+    when considering each tensor element in isolation, they must have a well-defined
+    shape. This extension also only supports tensor elements that all have the same
+    number of dimensions.
+
+    See Arrow docs for customizing extension arrays:
+    https://arrow.apache.org/docs/python/extending_types.html#custom-extension-array-class
+    """
+
+    @classmethod
+    def from_numpy(
+        cls, arr: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray]]
+    ) -> "ArrowVariableShapedTensorArray":
+        """
+        Convert an ndarray or an iterable of heterogeneous-shaped ndarrays to an array
+        of heterogeneous-shaped, homogeneous-typed tensors.
+
+        Args:
+            arr: An ndarray or an iterable of heterogeneous-shaped ndarrays.
+
+        Returns:
+            An ArrowVariableShapedTensorArray containing len(arr) tensors of
+            heterogeneous shape.
+        """
+        # Implementation note - Arrow representation of ragged tensors:
+        #
+        # We represent an array of ragged tensors using a struct array containing two
+        # fields:
+        #  - data: a variable-sized list array, where each element in the array is a
+        #    tensor element stored in a 1D (raveled) variable-sized list of the
+        #    underlying scalar data type.
+        #  - shape: a variable-sized list array containing the shapes of each tensor
+        #    element.
+        if not isinstance(arr, (list, tuple, np.ndarray)):
+            raise ValueError(
+                "ArrowVariableShapedTensorArray can only be constructed from an "
+                f"ndarray or a list/tuple of ndarrays, but got: {type(arr)}"
+            )
+        if len(arr) == 0:
+            # Empty ragged tensor arrays are not supported.
+            raise ValueError("Creating empty ragged tensor arrays is not supported.")
+
+        # Whether all subndarrays are contiguous views of the same ndarray.
+        shapes, sizes, raveled = [], [], []
+        ndim = None
+        for a in arr:
+            a = np.asarray(a)
+            if ndim is not None and a.ndim != ndim:
+                raise ValueError(
+                    "ArrowVariableShapedTensorArray only supports tensor elements that "
+                    "all have the same number of dimensions, but got tensor elements "
+                    f"with dimensions: {ndim}, {a.ndim}"
+                )
+            ndim = a.ndim
+            shapes.append(a.shape)
+            sizes.append(a.size)
+            # Convert to 1D array view; this should be zero-copy in the common case.
+            # NOTE: If array is not in C-contiguous order, this will convert it to
+            # C-contiguous order, incurring a copy.
+            a = np.ravel(a, order="C")
+            raveled.append(a)
+        # Get size offsets and total size.
+        sizes = np.array(sizes)
+        size_offsets = np.cumsum(sizes)
+        total_size = size_offsets[-1]
+        # Concatenate 1D views into a contiguous 1D array.
+        if all(_is_contiguous_view(curr, prev) for prev, curr in _pairwise(raveled)):
+            # An optimized zero-copy path if raveled tensor elements are already
+            # contiguous in memory, e.g. if this tensor array has already done a
+            # roundtrip through our Arrow representation.
+            np_data_buffer = raveled[-1].base
+        else:
+            np_data_buffer = np.concatenate(raveled)
+        dtype = np_data_buffer.dtype
+        pa_dtype = pa.from_numpy_dtype(dtype)
+        if pa.types.is_string(pa_dtype):
+            if dtype.byteorder == ">" or (
+                dtype.byteorder == "=" and sys.byteorder == "big"
+            ):
+                raise ValueError(
+                    "Only little-endian string tensors are supported, "
+                    f"but got: {dtype}"
+                )
+            pa_dtype = pa.binary(dtype.itemsize)
+        if dtype.type is np.bool_:
+            # NumPy doesn't represent boolean arrays as bit-packed, so we manually
+            # bit-pack the booleans before handing the buffer off to Arrow.
+            # NOTE: Arrow expects LSB bit-packed ordering.
+            # NOTE: This creates a copy.
+            np_data_buffer = np.packbits(np_data_buffer, bitorder="little")
+        data_buffer = pa.py_buffer(np_data_buffer)
+        # Construct underlying data array.
+        value_array = pa.Array.from_buffers(pa_dtype, total_size, [None, data_buffer])
+        # Construct array for offsets into the 1D data array, where each offset
+        # corresponds to a tensor element.
+        size_offsets = np.insert(size_offsets, 0, 0)
+        offset_array = pa.array(size_offsets)
+        data_array = pa.LargeListArray.from_arrays(offset_array, value_array)
+        # We store the tensor element shapes so we can reconstruct each tensor when
+        # converting back to NumPy ndarrays.
+        shape_array = pa.array(shapes)
+        # Build storage array containing tensor data and the tensor element shapes.
+        storage = pa.StructArray.from_arrays(
+            [data_array, shape_array],
+            ["data", "shape"],
+        )
+        type_ = ArrowVariableShapedTensorType(pa_dtype, ndim)
+        return pa.ExtensionArray.from_storage(type_, storage)
+
+    def _to_numpy(self, index: Optional[int] = None, zero_copy_only: bool = False):
+        """
+        Helper for getting either an element of the array of tensors as an ndarray, or
+        the entire array of tensors as a single ndarray.
+
+        Args:
+            index: The index of the tensor element that we wish to return as an
+                ndarray. If not given, the entire array of tensors is returned as an
+                ndarray.
+            zero_copy_only: If True, an exception will be raised if the conversion to a
+                NumPy array would require copying the underlying data (e.g. in presence
+                of nulls, or for non-primitive types). This argument is currently
+                ignored, so zero-copy isn't enforced even if this argument is true.
+
+        Returns:
+            The corresponding tensor element as an ndarray if an index was given, or
+            the entire array of tensors as an ndarray otherwise.
+        """
+        # TODO(Clark): Enforce zero_copy_only.
+        # TODO(Clark): Support strides?
+        if index is None:
+            # Get individual ndarrays for each tensor element.
+            arrs = [self._to_numpy(i, zero_copy_only) for i in range(len(self))]
+            # Return ragged NumPy ndarray in the ndarray of ndarray pointers
+            # representation.
+            return create_ragged_ndarray(arrs)
+        data = self.storage.field("data")
+        shapes = self.storage.field("shape")
+
+        shape = shapes[index].as_py()
+        value_type = data.type.value_type
+        offset = data.offsets[index].as_py()
+        data_buffer = data.buffers()[3]
+        return _to_ndarray_helper(shape, value_type, offset, data_buffer)
+
+    def to_numpy(self, zero_copy_only: bool = True):
+        """
+        Convert the entire array of tensors into a single ndarray.
+
+        Args:
+            zero_copy_only: If True, an exception will be raised if the conversion to a
+                NumPy array would require copying the underlying data (e.g. in presence
+                of nulls, or for non-primitive types). This argument is currently
+                ignored, so zero-copy isn't enforced even if this argument is true.
+
+        Returns:
+            A single ndarray representing the entire array of tensors.
+        """
+        return self._to_numpy(zero_copy_only=zero_copy_only)
+
+
+def _is_contiguous_view(curr: np.ndarray, prev: Optional[np.ndarray]) -> bool:
+    """Check if the provided tensor element is contiguous with the previous tensor
+    element.
+
+    Args:
+        curr: The tensor element whose contiguity that we wish to check.
+        prev: The previous tensor element in the tensor array.
+
+    Returns:
+        Whether the provided tensor element is contiguous with the previous tensor
+        element.
+    """
+    if (
+        curr.base is None
+        or not curr.data.c_contiguous
+        or (prev is not None and curr.base is not prev.base)
+    ):
+        # curr is either:
+        # - not a view,
+        # - not in C-contiguous order,
+        # - a view that does not share its base with the other subndarrays.
+        return False
+    else:
+        # curr is a C-contiguous view that shares the same base with the seen
+        # subndarrays, but we need to confirm that it is contiguous with the
+        # previous subndarray.
+        if prev is not None and (
+            _get_buffer_address(curr) - _get_buffer_address(prev)
+            != prev.base.dtype.itemsize * prev.size
+        ):
+            # This view is not contiguous with the previous view.
+            return False
+        else:
+            return True
+
+
+def _get_buffer_address(arr: np.ndarray) -> int:
+    """Get the address of the buffer underlying the provided NumPy ndarray."""
+    return arr.__array_interface__["data"][0]
+
+
+def _pairwise(iterable):
+    # pairwise('ABCDEFG') --> AB BC CD DE EF FG
+    # Backport of itertools.pairwise for Python < 3.10.
+    a, b = itertools.tee(iterable)
+    next(b, None)
+    return zip(a, b)
+
+
+def _to_ndarray_helper(shape, value_type, offset, data_buffer):
+    if pa.types.is_boolean(value_type):
+        # Arrow boolean array buffers are bit-packed, with 8 entries per byte,
+        # and are accessed via bit offsets.
+        buffer_item_width = value_type.bit_width
+    else:
+        # We assume all other array types are accessed via byte array
+        # offsets.
+        buffer_item_width = value_type.bit_width // 8
+    data_offset = buffer_item_width * offset
+
+    if pa.types.is_boolean(value_type):
+        # Special handling for boolean arrays, since Arrow
+        # bit-packs boolean arrays while NumPy does not.
+        # Cast as uint8 array and let NumPy unpack into a boolean view.
+        # Offset into uint8 array, where each element is
+        # a bucket for 8 booleans.
+        byte_bucket_offset = data_offset // 8
+        # Offset for a specific boolean, within a uint8 array element.
+        bool_offset = data_offset % 8
+        # The number of uint8 array elements (buckets) that our slice spans.
+        # Note that, due to the offset for a specific boolean,
+        # the slice can span byte boundaries even if it contains
+        # less than 8 booleans.
+        num_boolean_byte_buckets = 1 + ((bool_offset + np.prod(shape) - 1) // 8)
+        # Construct the uint8 array view on the buffer.
+        arr = np.ndarray(
+            (num_boolean_byte_buckets,),
+            dtype=np.uint8,
+            buffer=data_buffer,
+            offset=byte_bucket_offset,
+        )
+        # Unpack into a byte per boolean, using LSB bit-packed ordering.
+        arr = np.unpackbits(arr, bitorder="little")
+        # Interpret buffer as boolean array.
+        return np.ndarray(shape, dtype=np.bool_, buffer=arr, offset=bool_offset)
+    ext_dtype = value_type.to_pandas_dtype()
+    # Special handling of ragged string tensors
+    if pa.types.is_fixed_size_binary(value_type):
+        ext_dtype = np.dtype(f"<U{value_type.byte_width // NUM_BYTES_PER_UNICODE_CHAR}")
+    return np.ndarray(shape, dtype=ext_dtype, buffer=data_buffer, offset=data_offset)
+
+
+try:
+    # Registration needs an extension type instance, but then works for any instance of
+    # the same subclass regardless of parametrization of the type.
+    pa.register_extension_type(ArrowTensorType((0,), pa.int64()))
+    pa.register_extension_type(ArrowTensorTypeV2((0,), pa.int64()))
+    pa.register_extension_type(ArrowVariableShapedTensorType(pa.int64(), 0))
+except pa.ArrowKeyError:
+    # Extension types are already registered.
+    pass
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/utils.py b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..05fd9c5dd79d8427f4ad25b3930557857adb1cf1
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/utils.py
@@ -0,0 +1,145 @@
+import warnings
+from typing import TYPE_CHECKING, Any, Sequence, Union
+
+import numpy as np
+
+from ray.util import PublicAPI
+
+if TYPE_CHECKING:
+    from pandas.core.dtypes.generic import ABCSeries
+
+
+def _is_ndarray_tensor(arr: np.ndarray) -> bool:
+    """Return whether the provided NumPy ndarray is comprised of tensors.
+
+    NOTE: Tensor is defined as a NumPy array such that `len(arr.shape) > 1`
+    """
+
+    # Case of uniform-shaped (ie non-ragged) tensor
+    if arr.ndim > 1:
+        return True
+
+    # Case of ragged tensor (as produced by `create_ragged_ndarray` utility)
+    elif (
+        arr.dtype.type is np.object_ and len(arr) > 0 and isinstance(arr[0], np.ndarray)
+    ):
+        return True
+
+    return False
+
+
+def _is_ndarray_variable_shaped_tensor(arr: np.ndarray) -> bool:
+    """Return whether the provided NumPy ndarray is comprised of variable-shaped
+    tensors.
+
+    NOTE: This is an O(rows) check.
+    """
+    if arr.dtype.type is not np.object_:
+        return False
+    if len(arr) == 0:
+        return False
+    if not isinstance(arr[0], np.ndarray):
+        return False
+    shape = arr[0].shape
+    for a in arr[1:]:
+        if not isinstance(a, np.ndarray):
+            return False
+        if a.shape != shape:
+            return True
+    return True
+
+
+def _create_possibly_ragged_ndarray(
+    values: Union[np.ndarray, "ABCSeries", Sequence[Any]]
+) -> np.ndarray:
+    """
+    Create a possibly ragged ndarray.
+    Using the np.array() constructor will fail to construct a ragged ndarray that has a
+    uniform first dimension (e.g. uniform channel dimension in imagery). This function
+    catches this failure and tries a create-and-fill method to construct the ragged
+    ndarray.
+    """
+    try:
+        with warnings.catch_warnings():
+            # For NumPy < 1.24, constructing a ragged ndarray directly via
+            # `np.array(...)` without the `dtype=object` parameter will raise a
+            # VisibleDeprecationWarning which we suppress.
+            # More details: https://stackoverflow.com/q/63097829
+            if np.lib.NumpyVersion(np.__version__) >= "2.0.0":
+                copy_if_needed = None
+                warning_type = np.exceptions.VisibleDeprecationWarning
+            else:
+                copy_if_needed = False
+                warning_type = np.VisibleDeprecationWarning
+
+            warnings.simplefilter("ignore", category=warning_type)
+            arr = np.array(values, copy=copy_if_needed)
+            return arr
+    except ValueError as e:
+        # Constructing a ragged ndarray directly via `np.array(...)`
+        # without the `dtype=object` parameter will raise a ValueError.
+        # For NumPy < 1.24, the message is of the form:
+        # "could not broadcast input array from shape..."
+        # For NumPy >= 1.24, the message is of the form:
+        # "The requested array has an inhomogeneous shape..."
+        # More details: https://github.com/numpy/numpy/pull/22004
+        error_str = str(e)
+        if (
+            "could not broadcast input array from shape" in error_str
+            or "The requested array has an inhomogeneous shape" in error_str
+        ):
+            # Fall back to strictly creating a ragged ndarray.
+            return create_ragged_ndarray(values)
+        else:
+            # Re-raise original error if the failure wasn't a broadcast error.
+            raise e from None
+
+
+@PublicAPI(stability="alpha")
+def create_ragged_ndarray(values: Sequence[Any]) -> np.ndarray:
+    """Create an array that contains arrays of different length
+
+    If you're working with variable-length arrays like images, use this function to
+    create ragged arrays instead of ``np.array``.
+
+    .. note::
+        ``np.array`` fails to construct ragged arrays if the input arrays have a uniform
+        first dimension:
+
+        .. testsetup::
+
+            import numpy as np
+            from ray.air.util.tensor_extensions.utils import create_ragged_ndarray
+
+        .. doctest::
+
+            >>> values = [np.zeros((3, 1)), np.zeros((3, 2))]
+            >>> np.array(values, dtype=object)
+            Traceback (most recent call last):
+                ...
+            ValueError: could not broadcast input array from shape (3,1) into shape (3,)
+            >>> create_ragged_ndarray(values)
+            array([array([[0.],
+                          [0.],
+                          [0.]]), array([[0., 0.],
+                                         [0., 0.],
+                                         [0., 0.]])], dtype=object)
+
+        Or if you're creating a ragged array from a single array:
+
+        .. doctest::
+
+            >>> values = [np.zeros((3, 1))]
+            >>> np.array(values, dtype=object)[0].dtype
+            dtype('O')
+            >>> create_ragged_ndarray(values)[0].dtype
+            dtype('float64')
+
+        ``create_ragged_ndarray`` avoids the limitations of ``np.array`` by creating an
+        empty array and filling it with pointers to the variable-length arrays.
+    """  # noqa: E501
+    # Create an empty object-dtyped 1D array.
+    arr = np.empty(len(values), dtype=object)
+    # Try to fill the 1D array of pointers with the (ragged) tensors.
+    arr[:] = list(values)
+    return arr
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/xgrammar_bindings.cpython-311-x86_64-linux-gnu.so b/.venv/lib/python3.11/site-packages/xgrammar/xgrammar_bindings.cpython-311-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b3346e0f83650fa3ad64918d4435fcc2f3aafc77
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/xgrammar_bindings.cpython-311-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d49891b985816f83db0ca3a2325fde9a6651eafc06b890bee9f096d616f65f7
+size 971744