diff --git a/.venv/lib/python3.11/site-packages/ray/train/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f8c1343d68b47aa75279c0c7047314cbf1df389
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/__init__.py
@@ -0,0 +1,90 @@
+# Try import ray[train] core requirements (defined in setup.py)
+# isort: off
+try:
+    import fsspec  # noqa: F401
+    import pandas  # noqa: F401
+    import pyarrow  # noqa: F401
+    import requests  # noqa: F401
+except ImportError as exc:
+    raise ImportError(
+        "Can't import ray.train as some dependencies are missing. "
+        'Run `pip install "ray[train]"` to fix.'
+    ) from exc
+# isort: on
+
+
+from ray._private.usage import usage_lib
+from ray.air.config import CheckpointConfig, FailureConfig, RunConfig, ScalingConfig
+from ray.air.result import Result
+
+# Import this first so it can be used in other modules
+from ray.train._checkpoint import Checkpoint
+from ray.train._internal.data_config import DataConfig
+from ray.train._internal.session import get_checkpoint, get_dataset_shard, report
+from ray.train._internal.syncer import SyncConfig
+from ray.train.backend import BackendConfig
+from ray.train.constants import TRAIN_DATASET_KEY
+from ray.train.context import get_context
+from ray.train.trainer import TrainingIterator
+from ray.train.v2._internal.constants import is_v2_enabled
+
+if is_v2_enabled():
+    from ray.train.v2.api.callback import UserCallback  # noqa: F811
+    from ray.train.v2.api.config import (  # noqa: F811
+        FailureConfig,
+        RunConfig,
+        ScalingConfig,
+    )
+    from ray.train.v2.api.result import Result  # noqa: F811
+    from ray.train.v2.api.train_fn_utils import (  # noqa: F811
+        get_checkpoint,
+        get_context,
+        get_dataset_shard,
+        report,
+    )
+
+
+usage_lib.record_library_usage("train")
+
+Checkpoint.__module__ = "ray.train"
+
+__all__ = [
+    "get_checkpoint",
+    "get_context",
+    "get_dataset_shard",
+    "report",
+    "BackendConfig",
+    "Checkpoint",
+    "CheckpointConfig",
+    "DataConfig",
+    "FailureConfig",
+    "Result",
+    "RunConfig",
+    "ScalingConfig",
+    "SyncConfig",
+    "TrainingIterator",
+    "TRAIN_DATASET_KEY",
+]
+
+get_checkpoint.__module__ = "ray.train"
+get_context.__module__ = "ray.train"
+get_dataset_shard.__module__ = "ray.train"
+report.__module__ = "ray.train"
+BackendConfig.__module__ = "ray.train"
+Checkpoint.__module__ = "ray.train"
+CheckpointConfig.__module__ = "ray.train"
+DataConfig.__module__ = "ray.train"
+FailureConfig.__module__ = "ray.train"
+Result.__module__ = "ray.train"
+RunConfig.__module__ = "ray.train"
+ScalingConfig.__module__ = "ray.train"
+SyncConfig.__module__ = "ray.train"
+TrainingIterator.__module__ = "ray.train"
+
+
+if is_v2_enabled():
+    __all__.append("UserCallback")
+    UserCallback.__module__ = "ray.train"
+
+
+# DO NOT ADD ANYTHING AFTER THIS LINE.
diff --git a/.venv/lib/python3.11/site-packages/ray/train/_checkpoint.py b/.venv/lib/python3.11/site-packages/ray/train/_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ee65be4f20fe17126b315726bcfb825e9c89e45
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/_checkpoint.py
@@ -0,0 +1,424 @@
+import contextlib
+import glob
+import json
+import logging
+import os
+import platform
+import shutil
+import tempfile
+import traceback
+import uuid
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional, Union
+
+import pyarrow.fs
+
+from ray.air._internal.filelock import TempFileLock
+from ray.train._internal.storage import _download_from_fs_path, _exists_at_fs_path
+from ray.util.annotations import PublicAPI
+
+logger = logging.getLogger(__name__)
+
+# The filename of the file that stores user metadata set on the checkpoint.
+_METADATA_FILE_NAME = ".metadata.json"
+
+# The prefix of the temp checkpoint directory that `to_directory` downloads to
+# on the local filesystem.
+_CHECKPOINT_TEMP_DIR_PREFIX = "checkpoint_tmp_"
+
+
+class _CheckpointMetaClass(type):
+    def __getattr__(self, item):
+        try:
+            return super().__getattribute__(item)
+        except AttributeError as exc:
+            if item in {
+                "from_dict",
+                "to_dict",
+                "from_bytes",
+                "to_bytes",
+                "get_internal_representation",
+            }:
+                raise _get_migration_error(item) from exc
+            elif item in {
+                "from_uri",
+                "to_uri",
+                "uri",
+            }:
+                raise _get_uri_error(item) from exc
+            elif item in {"get_preprocessor", "set_preprocessor"}:
+                raise _get_preprocessor_error(item) from exc
+
+            raise exc
+
+
+@PublicAPI(stability="beta")
+class Checkpoint(metaclass=_CheckpointMetaClass):
+    """A reference to data persisted as a directory in local or remote storage.
+
+    Access the checkpoint contents locally using ``checkpoint.to_directory()``
+    or ``checkpoint.as_directory``.
+
+    Attributes
+    ----------
+    path: A path on the filesystem containing the checkpoint contents.
+    filesystem: PyArrow FileSystem that can be used to access data at the `path`.
+
+    See Also
+    --------
+    ray.train.report : Report a checkpoint during training (with Ray Train/Tune).
+    ray.train.get_checkpoint : Get the latest checkpoint during training
+        (for restoration).
+
+    :ref:`train-checkpointing`
+    :ref:`persistent-storage-guide`
+
+    Examples
+    --------
+
+    Creating a checkpoint using ``Checkpoint.from_directory``:
+
+        >>> from ray.train import Checkpoint
+        >>> checkpoint = Checkpoint.from_directory("/tmp/example_checkpoint_dir")
+        >>> checkpoint.filesystem  # doctest: +ELLIPSIS
+        <pyarrow._fs.LocalFileSystem object...
+        >>> checkpoint.path
+        '/tmp/example_checkpoint_dir'
+
+    Creating a checkpoint from a remote URI:
+
+        >>> checkpoint = Checkpoint("s3://bucket/path/to/checkpoint")
+        >>> checkpoint.filesystem  # doctest: +ELLIPSIS
+        <pyarrow._s3fs.S3FileSystem object...
+        >>> checkpoint.path
+        'bucket/path/to/checkpoint'
+
+    Creating a checkpoint with a custom filesystem:
+
+        >>> checkpoint = Checkpoint(
+        ...     path="bucket/path/to/checkpoint",
+        ...     filesystem=pyarrow.fs.S3FileSystem(),
+        ... )
+        >>> checkpoint.filesystem  # doctest: +ELLIPSIS
+        <pyarrow._s3fs.S3FileSystem object...
+        >>> checkpoint.path
+        'bucket/path/to/checkpoint'
+
+    Accessing a checkpoint's contents:
+
+        >>> import os  # doctest: +SKIP
+        >>> with checkpoint.as_directory() as local_checkpoint_dir:  # doctest: +SKIP
+        ...    print(os.listdir(local_checkpoint_dir))  # doctest: +SKIP
+        ['model.pt', 'optimizer.pt', 'misc.pt']
+    """
+
+    def __init__(
+        self,
+        path: Union[str, os.PathLike],
+        filesystem: Optional["pyarrow.fs.FileSystem"] = None,
+    ):
+        """Construct a Checkpoint.
+
+        Args:
+            path: A local path or remote URI containing the checkpoint data.
+                If a filesystem is provided, then this path must NOT be a URI.
+                It should be a path on the filesystem with the prefix already stripped.
+            filesystem: PyArrow FileSystem to use to access data at the path.
+                If not specified, this is inferred from the URI scheme.
+        """
+        self.path = str(path)
+        self.filesystem = filesystem
+
+        if path and not filesystem:
+            self.filesystem, self.path = pyarrow.fs.FileSystem.from_uri(path)
+
+        # This random UUID is used to create a temporary directory name on the
+        # local filesystem, which will be used for downloading checkpoint data.
+        # This ensures that if multiple processes download the same checkpoint object
+        # only one process performs the actual download while the others wait.
+        # This prevents duplicated download efforts and data.
+        # NOTE: Calling `to_directory` from multiple `Checkpoint` objects
+        # that point to the same (fs, path) will still download the data multiple times.
+        # This only ensures a canonical temp directory name for a single `Checkpoint`.
+        self._uuid = uuid.uuid4()
+
+    def __repr__(self):
+        return f"Checkpoint(filesystem={self.filesystem.type_name}, path={self.path})"
+
+    def get_metadata(self) -> Dict[str, Any]:
+        """Return the metadata dict stored with the checkpoint.
+
+        If no metadata is stored, an empty dict is returned.
+        """
+        metadata_path = Path(self.path, _METADATA_FILE_NAME).as_posix()
+        if not _exists_at_fs_path(self.filesystem, metadata_path):
+            return {}
+
+        with self.filesystem.open_input_file(metadata_path) as f:
+            return json.loads(f.readall().decode("utf-8"))
+
+    def set_metadata(self, metadata: Dict[str, Any]) -> None:
+        """Set the metadata stored with this checkpoint.
+
+        This will overwrite any existing metadata stored with this checkpoint.
+        """
+        metadata_path = Path(self.path, _METADATA_FILE_NAME).as_posix()
+        with self.filesystem.open_output_stream(metadata_path) as f:
+            f.write(json.dumps(metadata).encode("utf-8"))
+
+    def update_metadata(self, metadata: Dict[str, Any]) -> None:
+        """Update the metadata stored with this checkpoint.
+
+        This will update any existing metadata stored with this checkpoint.
+        """
+        existing_metadata = self.get_metadata()
+        existing_metadata.update(metadata)
+        self.set_metadata(existing_metadata)
+
+    @classmethod
+    def from_directory(cls, path: Union[str, os.PathLike]) -> "Checkpoint":
+        """Create checkpoint object from a local directory.
+
+        Args:
+            path: Local directory containing checkpoint data.
+
+        Returns:
+            A ray.train.Checkpoint object.
+        """
+        return cls(path, filesystem=pyarrow.fs.LocalFileSystem())
+
+    def to_directory(self, path: Optional[Union[str, os.PathLike]] = None) -> str:
+        """Write checkpoint data to a local directory.
+
+        *If multiple processes on the same node call this method simultaneously,*
+        only a single process will perform the download, while the others
+        wait for the download to finish. Once the download finishes, all processes
+        receive the same local directory to read from.
+
+        Args:
+            path: Target directory to download data to. If not specified,
+                this method will use a temporary directory.
+
+        Returns:
+            str: Directory containing checkpoint data.
+        """
+        user_provided_path = path is not None
+        local_path = (
+            path if user_provided_path else self._get_temporary_checkpoint_dir()
+        )
+        local_path = os.path.normpath(os.path.expanduser(str(local_path)))
+        os.makedirs(local_path, exist_ok=True)
+
+        try:
+            # Timeout 0 means there will be only one attempt to acquire
+            # the file lock. If it cannot be acquired, throw a TimeoutError
+            with TempFileLock(local_path, timeout=0):
+                _download_from_fs_path(
+                    fs=self.filesystem, fs_path=self.path, local_path=local_path
+                )
+        except TimeoutError:
+            # if the directory is already locked, then wait but do not do anything.
+            with TempFileLock(local_path, timeout=-1):
+                pass
+            if not os.path.exists(local_path):
+                raise RuntimeError(
+                    f"Checkpoint directory {local_path} does not exist, "
+                    "even though it should have been created by "
+                    "another process. Please raise an issue on GitHub: "
+                    "https://github.com/ray-project/ray/issues"
+                )
+
+        return local_path
+
+    @contextlib.contextmanager
+    def as_directory(self) -> Iterator[str]:
+        """Returns checkpoint contents in a local directory as a context.
+
+        This function makes checkpoint data available as a directory while avoiding
+        unnecessary copies and left-over temporary data.
+
+        *If the checkpoint points to a local directory*, this method just returns the
+        local directory path without making a copy, and nothing will be cleaned up
+        after exiting the context.
+
+        *If the checkpoint points to a remote directory*, this method will download the
+        checkpoint to a local temporary directory and return the path
+        to the temporary directory.
+
+        *If multiple processes on the same node call this method simultaneously,*
+        only a single process will perform the download, while the others
+        wait for the download to finish. Once the download finishes, all processes
+        receive the same local (temporary) directory to read from.
+
+        Once all processes have finished working with the checkpoint,
+        the temporary directory is cleaned up.
+
+        Users should treat the returned checkpoint directory as read-only and avoid
+        changing any data within it, as it may be deleted when exiting the context.
+
+        Example:
+
+        .. testcode::
+            :hide:
+
+            from pathlib import Path
+            import tempfile
+
+            from ray.train import Checkpoint
+
+            temp_dir = tempfile.mkdtemp()
+            (Path(temp_dir) / "example.txt").write_text("example checkpoint data")
+            checkpoint = Checkpoint.from_directory(temp_dir)
+
+        .. testcode::
+
+            with checkpoint.as_directory() as checkpoint_dir:
+                # Do some read-only processing of files within checkpoint_dir
+                pass
+
+            # At this point, if a temporary directory was created, it will have
+            # been deleted.
+
+        """
+        if isinstance(self.filesystem, pyarrow.fs.LocalFileSystem):
+            yield self.path
+        else:
+            del_lock_path = _get_del_lock_path(self._get_temporary_checkpoint_dir())
+            open(del_lock_path, "a").close()
+
+            temp_dir = self.to_directory()
+            try:
+                yield temp_dir
+            finally:
+                # Always cleanup the del lock after we're done with the directory.
+                # This avoids leaving a lock file behind in the case of an exception
+                # in the user code.
+                try:
+                    os.remove(del_lock_path)
+                except Exception:
+                    logger.warning(
+                        f"Could not remove {del_lock_path} deletion file lock. "
+                        f"Traceback:\n{traceback.format_exc()}"
+                    )
+
+                # If there are no more lock files, that means there are no more
+                # readers of this directory, and we can safely delete it.
+                # In the edge case (process crash before del lock file is removed),
+                # we do not remove the directory at all.
+                # Since it's in /tmp, this is not that big of a deal.
+                # check if any lock files are remaining
+                remaining_locks = _list_existing_del_locks(temp_dir)
+                if not remaining_locks:
+                    try:
+                        # Timeout 0 means there will be only one attempt to acquire
+                        # the file lock. If it cannot be acquired, a TimeoutError
+                        # will be thrown.
+                        with TempFileLock(temp_dir, timeout=0):
+                            shutil.rmtree(temp_dir, ignore_errors=True)
+                    except TimeoutError:
+                        pass
+
+    def _get_temporary_checkpoint_dir(self) -> str:
+        """Return the name for the temporary checkpoint dir that this checkpoint
+        will get downloaded to, if accessing via `to_directory` or `as_directory`.
+        """
+        tmp_dir_path = tempfile.gettempdir()
+        checkpoint_dir_name = _CHECKPOINT_TEMP_DIR_PREFIX + self._uuid.hex
+        if platform.system() == "Windows":
+            # Max path on Windows is 260 chars, -1 for joining \
+            # Also leave a little for the del lock
+            del_lock_name = _get_del_lock_path("")
+            checkpoint_dir_name = (
+                _CHECKPOINT_TEMP_DIR_PREFIX
+                + self._uuid.hex[
+                    -259
+                    + len(_CHECKPOINT_TEMP_DIR_PREFIX)
+                    + len(tmp_dir_path)
+                    + len(del_lock_name) :
+                ]
+            )
+            if not checkpoint_dir_name.startswith(_CHECKPOINT_TEMP_DIR_PREFIX):
+                raise RuntimeError(
+                    "Couldn't create checkpoint directory due to length "
+                    "constraints. Try specifying a shorter checkpoint path."
+                )
+        return Path(tmp_dir_path, checkpoint_dir_name).as_posix()
+
+    def __fspath__(self):
+        raise TypeError(
+            "You cannot use `Checkpoint` objects directly as paths. "
+            "Use `Checkpoint.to_directory()` or `Checkpoint.as_directory()` instead."
+        )
+
+
+def _get_del_lock_path(path: str, suffix: str = None) -> str:
+    """Get the path to the deletion lock file for a file/directory at `path`.
+
+    Example:
+
+        >>> _get_del_lock_path("/tmp/checkpoint_tmp")  # doctest: +ELLIPSIS
+        '/tmp/checkpoint_tmp.del_lock_...
+        >>> _get_del_lock_path("/tmp/checkpoint_tmp/")  # doctest: +ELLIPSIS
+        '/tmp/checkpoint_tmp.del_lock_...
+        >>> _get_del_lock_path("/tmp/checkpoint_tmp.txt")  # doctest: +ELLIPSIS
+        '/tmp/checkpoint_tmp.txt.del_lock_...
+
+    """
+    suffix = suffix if suffix is not None else str(os.getpid())
+    return f"{path.rstrip('/')}.del_lock_{suffix}"
+
+
+def _list_existing_del_locks(path: str) -> List[str]:
+    """List all the deletion lock files for a file/directory at `path`.
+
+    For example, if 2 checkpoints are being read via `as_directory`,
+    then this should return a list of 2 deletion lock files.
+    """
+    return list(glob.glob(f"{_get_del_lock_path(path, suffix='*')}"))
+
+
+def _get_migration_error(name: str):
+    return AttributeError(
+        f"The new `ray.train.Checkpoint` class does not support `{name}()`. "
+        f"Instead, only directories are supported.\n\n"
+        f"Example to store a dictionary in a checkpoint:\n\n"
+        f"import os, tempfile\n"
+        f"import ray.cloudpickle as pickle\n"
+        f"from ray import train\n"
+        f"from ray.train import Checkpoint\n\n"
+        f"with tempfile.TemporaryDirectory() as checkpoint_dir:\n"
+        f"  with open(os.path.join(checkpoint_dir, 'data.pkl'), 'wb') as fp:\n"
+        f"    pickle.dump({{'data': 'value'}}, fp)\n\n"
+        f"  checkpoint = Checkpoint.from_directory(checkpoint_dir)\n"
+        f"  train.report(..., checkpoint=checkpoint)\n\n"
+        f"Example to load a dictionary from a checkpoint:\n\n"
+        f"if train.get_checkpoint():\n"
+        f"  with train.get_checkpoint().as_directory() as checkpoint_dir:\n"
+        f"    with open(os.path.join(checkpoint_dir, 'data.pkl'), 'rb') as fp:\n"
+        f"      data = pickle.load(fp)"
+    )
+
+
+def _get_uri_error(name: str):
+    return AttributeError(
+        f"The new `ray.train.Checkpoint` class does not support `{name}()`. "
+        f"To create a checkpoint from remote storage, create a `Checkpoint` using its "
+        f"constructor instead of `from_directory`.\n"
+        f'Example: `Checkpoint(path="s3://a/b/c")`.\n'
+        f"Then, access the contents of the checkpoint with "
+        f"`checkpoint.as_directory()` / `checkpoint.to_directory()`.\n"
+        f"To upload data to remote storage, use e.g. `pyarrow.fs.FileSystem` "
+        f"or your client of choice."
+    )
+
+
+def _get_preprocessor_error(name: str):
+    return AttributeError(
+        f"The new `ray.train.Checkpoint` class does not support `{name}()`. "
+        f"To include preprocessor information in checkpoints, "
+        f"pass it as metadata in the <Framework>Trainer constructor.\n"
+        f"Example: `TorchTrainer(..., metadata={{...}})`.\n"
+        f"After training, access it in the checkpoint via `checkpoint.get_metadata()`. "
+        f"See here: https://docs.ray.io/en/master/train/user-guides/"
+        f"data-loading-preprocessing.html#preprocessing-structured-data"
+    )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/backend.py b/.venv/lib/python3.11/site-packages/ray/train/backend.py
new file mode 100644
index 0000000000000000000000000000000000000000..b50f5867e7a75f47b36a0778463d7a113be1585d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/backend.py
@@ -0,0 +1,59 @@
+import logging
+from contextlib import nullcontext
+from typing import TypeVar
+
+from ray.train._internal.utils import Singleton
+from ray.train._internal.worker_group import WorkerGroup
+from ray.util.annotations import DeveloperAPI
+from ray.widgets import make_table_html_repr
+
+EncodedData = TypeVar("EncodedData")
+
+logger = logging.getLogger(__name__)
+
+
+@DeveloperAPI
+class BackendConfig:
+    """Parent class for configurations of training backend."""
+
+    @property
+    def backend_cls(self):
+        return Backend
+
+    @property
+    def train_func_context(self):
+        return nullcontext
+
+    def _repr_html_(self) -> str:
+        return make_table_html_repr(obj=self, title=type(self).__name__)
+
+
+@DeveloperAPI
+class Backend(metaclass=Singleton):
+    """Singleton for distributed communication backend.
+
+    Attributes:
+        share_cuda_visible_devices: If True, each worker
+            process will have CUDA_VISIBLE_DEVICES set as the visible device
+            IDs of all workers on the same node for this training instance.
+            If False, each worker will have CUDA_VISIBLE_DEVICES set to the
+            device IDs allocated by Ray for that worker.
+    """
+
+    share_cuda_visible_devices: bool = False
+
+    def on_start(self, worker_group: WorkerGroup, backend_config: BackendConfig):
+        """Logic for starting this backend."""
+        pass
+
+    def on_shutdown(self, worker_group: WorkerGroup, backend_config: BackendConfig):
+        """Logic for shutting down the backend."""
+        pass
+
+    def on_training_start(
+        self, worker_group: WorkerGroup, backend_config: BackendConfig
+    ):
+        """Logic ran right before training is started.
+
+        Session API is available at this point."""
+        pass
diff --git a/.venv/lib/python3.11/site-packages/ray/train/base_trainer.py b/.venv/lib/python3.11/site-packages/ray/train/base_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..04cac51c1ee0633faf58f01a6c61adabbb25234c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/base_trainer.py
@@ -0,0 +1,827 @@
+import abc
+import copy
+import inspect
+import json
+import logging
+import os
+import warnings
+from functools import partial
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, Union
+
+import pyarrow.fs
+
+import ray
+import ray.cloudpickle as pickle
+from ray._private.dict import deep_update
+from ray.air._internal import usage as air_usage
+from ray.air._internal.config import ensure_only_allowed_dataclass_keys_updated
+from ray.air._internal.usage import AirEntrypoint
+from ray.air.config import RunConfig, ScalingConfig
+from ray.air.result import Result
+from ray.train import Checkpoint
+from ray.train._internal.session import get_session
+from ray.train._internal.storage import (
+    StorageContext,
+    _exists_at_fs_path,
+    get_fs_and_path,
+)
+from ray.util import PublicAPI
+from ray.util.annotations import DeveloperAPI
+
+if TYPE_CHECKING:
+    from ray.data import Dataset
+    from ray.tune import Trainable
+
+_TRAINER_PKL = "trainer.pkl"
+
+# A type representing either a ray.data.Dataset or a function that returns a
+# ray.data.Dataset and accepts no arguments.
+GenDataset = Union["Dataset", Callable[[], "Dataset"]]
+
+
+logger = logging.getLogger(__name__)
+
+PREPROCESSOR_DEPRECATION_MESSAGE = (
+    "The `preprocessor` argument to Trainers is deprecated as of Ray 2.7. "
+    "Instead, use the Preprocessor `fit` and `transform` APIs directly on the Ray "
+    "Dataset. For any state that needs to be saved to the trained checkpoint, pass it "
+    "in using the `metadata` argument of the `Trainer`. "
+    "For a full example, see "
+    "https://docs.ray.io/en/master/train/user-guides/data-loading-preprocessing.html#preprocessing-structured-data "  # noqa:E501
+)
+
+
+@PublicAPI(stability="beta")
+class TrainingFailedError(RuntimeError):
+    """An error indicating that training has failed."""
+
+    _RESTORE_MSG = (
+        "The Ray Train run failed. Please inspect the previous error messages for a "
+        "cause. After fixing the issue (assuming that the error is not caused by "
+        "your own application logic, but rather an error such as OOM), you can restart "
+        "the run from scratch or continue this run.\n"
+        "To continue this run, you can use: "
+        '`trainer = {trainer_cls_name}.restore("{path}")`.'
+    )
+
+    _FAILURE_CONFIG_MSG = (
+        "To start a new run that will retry on training failures, set "
+        "`train.RunConfig(failure_config=train.FailureConfig(max_failures))` "
+        "in the Trainer's `run_config` with `max_failures > 0`, or `max_failures = -1` "
+        "for unlimited retries."
+    )
+
+
+def _train_coordinator_fn(
+    config: dict, trainer_cls: Type["BaseTrainer"], metadata: dict
+):
+    """This is the function that defines the logic of the Ray Train coordinator.
+    This is responsible for setting up a remote instance of the `trainer_cls`
+    (a different instance than the one calling `trainer.fit` on the driver!)
+    and running the training loop.
+    """
+    assert metadata is not None, metadata
+    # Propagate user metadata from the Trainer constructor.
+    get_session().metadata = metadata
+
+    # config already contains merged values.
+    # Instantiate new Trainer in Trainable.
+    trainer = trainer_cls(**config)
+
+    # Get the checkpoint from Tune and pass it to workers later on.
+    checkpoint = ray.train.get_checkpoint()
+    if checkpoint:
+        # Set `starting_checkpoint` for auto-recovery fault-tolerance
+        # as well as manual restoration.
+        trainer.starting_checkpoint = checkpoint
+    # else: Train will restore from the user-provided
+    # `resume_from_checkpoint` == `starting_checkpoint`.
+
+    # Evaluate datasets if they are wrapped in a factory.
+    trainer.datasets = {
+        k: d() if callable(d) else d for k, d in trainer.datasets.items()
+    }
+
+    trainer.setup()
+    trainer.training_loop()
+
+
+@DeveloperAPI
+class BaseTrainer(abc.ABC):
+    """Defines interface for distributed training on Ray.
+
+    Note: The base ``BaseTrainer`` class cannot be instantiated directly. Only
+    one of its subclasses can be used.
+
+    Note to developers: If a new trainer is added, please update
+    `air/_internal/usage.py`.
+
+    **How does a trainer work?**
+
+    - First, initialize the Trainer. The initialization runs locally,
+      so heavyweight setup should not be done in ``__init__``.
+    - Then, when you call ``trainer.fit()``, the Trainer is serialized
+      and copied to a remote Ray actor. The following methods are then
+      called in sequence on the remote actor.
+    - ``trainer.setup()``: Any heavyweight Trainer setup should be
+      specified here.
+    - ``trainer.training_loop()``: Executes the main training logic.
+    - Calling ``trainer.fit()`` will return a ``ray.result.Result``
+      object where you can access metrics from your training run, as well
+      as any checkpoints that may have been saved.
+
+    **How do I create a new Trainer?**
+
+    Subclass ``ray.train.trainer.BaseTrainer``, and override the ``training_loop``
+    method, and optionally ``setup``.
+
+    .. testcode::
+
+        import torch
+
+        from ray.train.trainer import BaseTrainer
+        from ray import train, tune
+
+
+        class MyPytorchTrainer(BaseTrainer):
+            def setup(self):
+                self.model = torch.nn.Linear(1, 1)
+                self.optimizer = torch.optim.SGD(
+                    self.model.parameters(), lr=0.1)
+
+            def training_loop(self):
+                # You can access any Trainer attributes directly in this method.
+                # self.datasets["train"] has already been
+                dataset = self.datasets["train"]
+
+                torch_ds = dataset.iter_torch_batches(dtypes=torch.float)
+                loss_fn = torch.nn.MSELoss()
+
+                for epoch_idx in range(10):
+                    loss = 0
+                    num_batches = 0
+                    torch_ds = dataset.iter_torch_batches(
+                        dtypes=torch.float, batch_size=2
+                    )
+                    for batch in torch_ds:
+                        X = torch.unsqueeze(batch["x"], 1)
+                        y = torch.unsqueeze(batch["y"], 1)
+                        # Compute prediction error
+                        pred = self.model(X)
+                        batch_loss = loss_fn(pred, y)
+
+                        # Backpropagation
+                        self.optimizer.zero_grad()
+                        batch_loss.backward()
+                        self.optimizer.step()
+
+                        loss += batch_loss.item()
+                        num_batches += 1
+                    loss /= num_batches
+
+                    # Use Tune functions to report intermediate
+                    # results.
+                    train.report({"loss": loss, "epoch": epoch_idx})
+
+
+        # Initialize the Trainer, and call Trainer.fit()
+        import ray
+        train_dataset = ray.data.from_items(
+            [{"x": i, "y": i} for i in range(10)])
+        my_trainer = MyPytorchTrainer(datasets={"train": train_dataset})
+        result = my_trainer.fit()
+
+    .. testoutput::
+            :hide:
+
+            ...
+
+    Args:
+        scaling_config: Configuration for how to scale training.
+        run_config: Configuration for the execution of the training run.
+        datasets: Any Datasets to use for training. Use the key "train"
+            to denote which dataset is the training dataset.
+        metadata: Dict that should be made available via
+            `train.get_context().get_metadata()` and in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+        resume_from_checkpoint: A checkpoint to resume training from.
+    """
+
+    _scaling_config_allowed_keys: List[str] = [
+        "trainer_resources",
+    ]
+    _handles_checkpoint_freq: bool = False
+    _handles_checkpoint_at_end: bool = False
+
+    # fields to propagate to Tuner param_space.
+    # See `BaseTrainer._extract_fields_for_tuner_param_space` for more details.
+    _fields_for_tuner_param_space = []
+
+    def __init__(
+        self,
+        *,
+        scaling_config: Optional[ScalingConfig] = None,
+        run_config: Optional[RunConfig] = None,
+        datasets: Optional[Dict[str, GenDataset]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+    ):
+        self.scaling_config = (
+            scaling_config if scaling_config is not None else ScalingConfig()
+        )
+        self.run_config = (
+            copy.copy(run_config) if run_config is not None else RunConfig()
+        )
+        self.metadata = metadata
+        self.datasets = datasets if datasets is not None else {}
+        self.starting_checkpoint = resume_from_checkpoint
+
+        # These attributes should only be set through `BaseTrainer.restore`
+        self._restore_path = None
+        self._restore_storage_filesystem = None
+
+        self._validate_attributes()
+
+        air_usage.tag_air_trainer(self)
+
+    @PublicAPI(stability="alpha")
+    @classmethod
+    def restore(
+        cls: Type["BaseTrainer"],
+        path: Union[str, os.PathLike],
+        storage_filesystem: Optional[pyarrow.fs.FileSystem] = None,
+        datasets: Optional[Dict[str, GenDataset]] = None,
+        scaling_config: Optional[ScalingConfig] = None,
+        **kwargs,
+    ) -> "BaseTrainer":
+        """Restores a Train experiment from a previously interrupted/failed run.
+
+        Restore should be used for experiment-level fault tolerance in the event
+        that the head node crashes (e.g., OOM or some other runtime error) or the
+        entire cluster goes down (e.g., network error affecting all nodes).
+
+        A run that has already completed successfully will not be resumed from this API.
+        To continue training from a successful run, launch a new run with the
+        ``<Framework>Trainer(resume_from_checkpoint)`` API instead, passing in a
+        checkpoint from the previous run to start with.
+
+        .. note::
+
+            Restoring an experiment from a path that's pointing to a *different*
+            location than the original experiment path is supported. However, Ray Train
+            assumes that the full experiment directory is available
+            (including checkpoints) so that it's possible to resume trials from their
+            latest state.
+
+            For example, if the original experiment path was run locally, then the
+            results are uploaded to cloud storage, Ray Train expects the full contents
+            to be available in cloud storage if attempting to resume
+            via ``<Framework>Trainer.restore("s3://...")``. The restored run will
+            continue writing results to the same cloud storage location.
+
+        The following example can be paired with implementing job retry using
+        :ref:`Ray Jobs <jobs-overview>` to produce a Train experiment that will
+        attempt to resume on both experiment-level and trial-level failures:
+
+        .. testcode::
+
+            import os
+            import ray
+            from ray import train
+            from ray.train.trainer import BaseTrainer
+
+            experiment_name = "unique_experiment_name"
+            storage_path = os.path.expanduser("~/ray_results")
+            experiment_dir = os.path.join(storage_path, experiment_name)
+
+            # Define some dummy inputs for demonstration purposes
+            datasets = {"train": ray.data.from_items([{"a": i} for i in range(10)])}
+
+            class CustomTrainer(BaseTrainer):
+                def training_loop(self):
+                    pass
+
+            if CustomTrainer.can_restore(experiment_dir):
+                trainer = CustomTrainer.restore(
+                    experiment_dir, datasets=datasets
+                )
+            else:
+                trainer = CustomTrainer(
+                    datasets=datasets,
+                    run_config=train.RunConfig(
+                        name=experiment_name,
+                        storage_path=storage_path,
+                        # Tip: You can also enable retries on failure for
+                        # worker-level fault tolerance
+                        failure_config=train.FailureConfig(max_failures=3),
+                    ),
+                )
+
+            result = trainer.fit()
+
+        .. testoutput::
+            :hide:
+
+            ...
+
+        Args:
+            path: The path to the experiment directory of the training run to restore.
+                This can be a local path or a remote URI if the experiment was
+                uploaded to the cloud.
+            storage_filesystem: Custom ``pyarrow.fs.FileSystem``
+                corresponding to the ``path``. This may be necessary if the original
+                experiment passed in a custom filesystem.
+            datasets: Re-specified datasets used in the original training run.
+                This must include all the datasets that were passed in the
+                original trainer constructor.
+            scaling_config: Optionally re-specified scaling config. This can be
+                modified to be different from the original spec.
+            **kwargs: Other optionally re-specified arguments, passed in by subclasses.
+
+        Raises:
+            ValueError: If all datasets were not re-supplied on restore.
+
+        Returns:
+            BaseTrainer: A restored instance of the class that is calling this method.
+        """
+        if not cls.can_restore(path, storage_filesystem):
+            raise ValueError(
+                f"Invalid restore path: {path}. Make sure that this path exists and "
+                "is the experiment directory that results from a call to "
+                "`trainer.fit()`."
+            )
+        fs, fs_path = get_fs_and_path(path, storage_filesystem)
+        trainer_pkl_path = Path(fs_path, _TRAINER_PKL).as_posix()
+        with fs.open_input_file(trainer_pkl_path) as f:
+            trainer_cls, param_dict = pickle.loads(f.readall())
+
+        if trainer_cls is not cls:
+            warnings.warn(
+                f"Invalid trainer type. You are attempting to restore a trainer of type"
+                f" {trainer_cls} with `{cls.__name__}.restore`, "
+                "which will most likely fail. "
+                f"Use `{trainer_cls.__name__}.restore` instead."
+            )
+
+        original_datasets = param_dict.pop("datasets", {})
+        if original_datasets and not datasets:
+            raise ValueError(
+                "The following datasets need to be provided again on restore: "
+                f"{list(original_datasets.keys())}\n"
+                f"Use {cls.__name__}.restore(..., datasets=datasets) "
+                "with the datasets that were provided to the original trainer."
+            )
+        datasets = datasets or {}
+        if set(original_datasets) != set(datasets):
+            raise ValueError(
+                "The provided datasets don't match the original dataset keys.\n"
+                f"  Expected datasets for the keys: {list(original_datasets.keys())}\n"
+                f"  Actual datasets provided: {list(datasets.keys())}"
+            )
+        param_dict["datasets"] = datasets
+
+        if scaling_config:
+            param_dict["scaling_config"] = scaling_config
+
+        for param_name, val in kwargs.items():
+            # Overwrite the old value if something is passed into restore
+            if val is not None:
+                param_dict[param_name] = val
+
+        try:
+            trainer = cls(**param_dict)
+        except Exception as e:
+            raise ValueError(
+                "Trainer restoration failed (see above for the stack trace). "
+                "Make sure that you use the right trainer class to restore: "
+                f"`{cls.__name__}.restore`\n"
+            ) from e
+        trainer._restore_path = path
+        trainer._restore_storage_filesystem = storage_filesystem
+        return trainer
+
+    @PublicAPI(stability="alpha")
+    @classmethod
+    def can_restore(
+        cls: Type["BaseTrainer"],
+        path: Union[str, os.PathLike],
+        storage_filesystem: Optional[pyarrow.fs.FileSystem] = None,
+    ) -> bool:
+        """Checks whether a given directory contains a restorable Train experiment.
+
+        Args:
+            path: The path to the experiment directory of the Train experiment.
+                This can be either a local directory (e.g., ~/ray_results/exp_name)
+                or a remote URI (e.g., s3://bucket/exp_name).
+
+        Returns:
+            bool: Whether this path exists and contains the trainer state to resume from
+        """
+        fs, fs_path = get_fs_and_path(path, storage_filesystem)
+        trainer_pkl_path = Path(fs_path, _TRAINER_PKL).as_posix()
+        return _exists_at_fs_path(fs, trainer_pkl_path)
+
+    def __repr__(self):
+        # A dictionary that maps parameters to their default values.
+        default_values: Dict[str, Any] = {
+            "scaling_config": ScalingConfig(),
+            "run_config": RunConfig(),
+            "datasets": {},
+            "starting_checkpoint": None,
+        }
+
+        non_default_arguments = []
+        for parameter, default_value in default_values.items():
+            value = getattr(self, parameter)
+            if value != default_value:
+                non_default_arguments.append(f"{parameter}={value!r}")
+
+        if non_default_arguments:
+            return f"<{self.__class__.__name__} {' '.join(non_default_arguments)}>"
+
+        return f"<{self.__class__.__name__}>"
+
+    def __new__(cls, *args, **kwargs):
+        # Store the init args as attributes so this can be merged with Tune hparams.
+        trainer = super(BaseTrainer, cls).__new__(cls)
+        parameters = inspect.signature(cls.__init__).parameters
+        parameters = list(parameters.keys())
+        # Remove self.
+        parameters = parameters[1:]
+        arg_dict = dict(zip(parameters, args))
+        trainer._param_dict = {**arg_dict, **kwargs}
+        return trainer
+
+    def _validate_attributes(self):
+        """Called on __init()__ to validate trainer attributes."""
+        # Run config
+        if not isinstance(self.run_config, RunConfig):
+            raise ValueError(
+                f"`run_config` should be an instance of `ray.train.RunConfig`, "
+                f"found {type(self.run_config)} with value `{self.run_config}`."
+            )
+        # Scaling config
+        if not isinstance(self.scaling_config, ScalingConfig):
+            raise ValueError(
+                "`scaling_config` should be an instance of `ScalingConfig`, "
+                f"found {type(self.scaling_config)} with value `{self.scaling_config}`."
+            )
+        # Datasets
+        if not isinstance(self.datasets, dict):
+            raise ValueError(
+                f"`datasets` should be a dict mapping from a string to "
+                f"`ray.data.Dataset` objects, "
+                f"found {type(self.datasets)} with value `{self.datasets}`."
+            )
+        else:
+            for key, dataset in self.datasets.items():
+                if not isinstance(dataset, ray.data.Dataset) and not callable(dataset):
+                    raise ValueError(
+                        f"The Dataset under '{key}' key is not a "
+                        "`ray.data.Dataset`. "
+                        f"Received {dataset} instead."
+                    )
+        # Metadata.
+        self.metadata = self.metadata or {}
+        if not isinstance(self.metadata, dict):
+            raise TypeError(
+                f"The provided metadata must be a dict, was {type(self.metadata)}."
+            )
+        try:
+            self.metadata = json.loads(json.dumps(self.metadata))
+        except Exception as e:
+            raise ValueError(
+                "The provided metadata must be JSON-serializable: "
+                f"{self.metadata}: {e}"
+            )
+
+        if self.starting_checkpoint is not None and not isinstance(
+            self.starting_checkpoint, Checkpoint
+        ):
+            raise ValueError(
+                f"`resume_from_checkpoint` should be an instance of "
+                f"`ray.train.Checkpoint`, found {type(self.starting_checkpoint)} "
+                f"with value `{self.starting_checkpoint}`."
+            )
+
+    @classmethod
+    def _validate_scaling_config(cls, scaling_config: ScalingConfig) -> ScalingConfig:
+        """Returns scaling config dataclass after validating updated keys."""
+        ensure_only_allowed_dataclass_keys_updated(
+            dataclass=scaling_config,
+            allowed_keys=cls._scaling_config_allowed_keys,
+        )
+        return scaling_config
+
+    def setup(self) -> None:
+        """Called during fit() to perform initial setup on the Trainer.
+
+        .. note:: This method is run on a remote process.
+
+        This method will not be called on the driver, so any expensive setup
+        operations should be placed here and not in ``__init__``.
+
+        This method is called prior to ``preprocess_datasets`` and
+        ``training_loop``.
+        """
+        pass
+
+    def preprocess_datasets(self) -> None:
+        """Deprecated."""
+        raise DeprecationWarning(
+            "`preprocess_datasets` is no longer used, since preprocessors "
+            f"are no longer accepted by Trainers.\n{PREPROCESSOR_DEPRECATION_MESSAGE}"
+        )
+
+    @abc.abstractmethod
+    def training_loop(self) -> None:
+        """Loop called by fit() to run training and report results to Tune.
+
+        .. note:: This method runs on a remote process.
+
+        ``self.datasets`` have already been evaluated if they were wrapped in a factory.
+
+        You can use the :ref:`Ray Train utilities <train-loop-api>`
+        (:func:`train.report() <ray.train.report>` and
+        :func:`train.get_checkpoint() <ray.train.get_checkpoint>`) inside
+        this training loop.
+
+        Example:
+
+        .. testcode::
+
+            from ray.train.trainer import BaseTrainer
+            from ray import train
+
+            class MyTrainer(BaseTrainer):
+                def training_loop(self):
+                    for epoch_idx in range(5):
+                        ...
+                        train.report({"epoch": epoch_idx})
+
+        """
+        raise NotImplementedError
+
+    @PublicAPI(stability="beta")
+    def fit(self) -> Result:
+        """Runs training.
+
+        Returns:
+            A Result object containing the training result.
+
+        Raises:
+            TrainingFailedError: If any failures during the execution
+                of ``self.as_trainable()``, or during the Tune execution loop.
+        """
+        from ray.tune import ResumeConfig, TuneError
+        from ray.tune.tuner import Tuner
+
+        trainable = self.as_trainable()
+        param_space = self._extract_fields_for_tuner_param_space()
+
+        self.run_config.name = (
+            self.run_config.name or StorageContext.get_experiment_dir_name(trainable)
+        )
+        # The storage context here is only used to access the resolved
+        # storage fs and experiment path, in order to avoid duplicating that logic.
+        # This is NOT the storage context object that gets passed to remote workers.
+        storage = StorageContext(
+            storage_path=self.run_config.storage_path,
+            experiment_dir_name=self.run_config.name,
+            storage_filesystem=self.run_config.storage_filesystem,
+        )
+
+        if self._restore_path:
+            tuner = Tuner.restore(
+                path=self._restore_path,
+                trainable=trainable,
+                param_space=param_space,
+                _resume_config=ResumeConfig(
+                    finished=ResumeConfig.ResumeType.RESUME,
+                    unfinished=ResumeConfig.ResumeType.RESUME,
+                    errored=ResumeConfig.ResumeType.RESUME,
+                ),
+                storage_filesystem=self._restore_storage_filesystem,
+            )
+        else:
+            tuner = Tuner(
+                trainable=trainable,
+                param_space=param_space,
+                run_config=self.run_config,
+                _entrypoint=AirEntrypoint.TRAINER,
+            )
+
+        self._save(storage.storage_filesystem, storage.experiment_fs_path)
+
+        restore_msg = TrainingFailedError._RESTORE_MSG.format(
+            trainer_cls_name=self.__class__.__name__,
+            path=str(storage.experiment_fs_path),
+        )
+
+        try:
+            result_grid = tuner.fit()
+        except TuneError as e:
+            # Catch any `TuneError`s raised by the `Tuner.fit` call.
+            # Unwrap the `TuneError` if needed.
+            parent_error = e.__cause__ or e
+
+            # Raise it to the user as a `TrainingFailedError` with a message to restore.
+            raise TrainingFailedError(restore_msg) from parent_error
+        # Other exceptions get passed through directly (ex: on `fail_fast='raise'`)
+
+        assert len(result_grid) == 1
+        result = result_grid[0]
+        if result.error:
+            # Raise trainable errors to the user with a message to restore
+            # or configure `FailureConfig` in a new run.
+            raise TrainingFailedError(
+                "\n".join([restore_msg, TrainingFailedError._FAILURE_CONFIG_MSG])
+            ) from result.error
+        return result
+
+    def _save(self, fs: pyarrow.fs.FileSystem, experiment_path: str):
+        """Saves the current trainer's class along with the `param_dict` of
+        parameters passed to this trainer's constructor.
+
+        This is used to recreate the trainer on restore.
+        Unless a parameter is re-specified during restoration (only a subset
+        of parameters can be passed in again), that parameter will be loaded
+        from the saved copy.
+
+        Datasets should not be saved as part of the state. Instead, we save the
+        keys and replace the dataset values with dummy functions that will
+        raise an error if invoked. The error only serves as a guardrail for
+        misuse (e.g., manually unpickling and constructing the Trainer again)
+        and is not typically surfaced, since datasets must be re-specified
+        upon restoration.
+        """
+        param_dict = self._param_dict.copy()
+        datasets = param_dict.pop("datasets", {})
+
+        def raise_fn():
+            raise RuntimeError
+
+        if datasets:
+            param_dict["datasets"] = {
+                dataset_name: raise_fn for dataset_name in datasets
+            }
+
+        cls_and_param_dict = (self.__class__, param_dict)
+
+        fs.create_dir(experiment_path)
+        with fs.open_output_stream(Path(experiment_path, _TRAINER_PKL).as_posix()) as f:
+            f.write(pickle.dumps(cls_and_param_dict))
+
+    def _extract_fields_for_tuner_param_space(self) -> Dict:
+        """Extracts fields to be included in `Tuner.param_space`.
+
+        This is needed to leverage the full logging/integration offerings from Tune.
+        For example, `param_space` is logged automatically to wandb integration.
+
+        Currently only done for `train_loop_config`.
+
+        Returns:
+            A dictionary that should be passed to Tuner.param_space.
+        """
+        result = {}
+        for key in self._fields_for_tuner_param_space:
+            if key in self._param_dict.keys():
+                result[key] = copy.deepcopy(self._param_dict[key])
+        return result
+
+    def _generate_trainable_cls(self) -> Type["Trainable"]:
+        """Generates the base Trainable class.
+
+        Returns:
+            A Trainable class to use for training.
+        """
+
+        from ray.tune.execution.placement_groups import PlacementGroupFactory
+        from ray.tune.trainable import wrap_function
+
+        trainer_cls = self.__class__
+        scaling_config = self.scaling_config
+        metadata = self.metadata
+
+        train_coordinator_fn = partial(
+            _train_coordinator_fn, trainer_cls=trainer_cls, metadata=metadata
+        )
+        # Change the name of the training function to match the name of the Trainer
+        # class. This will mean the Tune trial name will match the name of Trainer on
+        # stdout messages and the results directory.
+        train_coordinator_fn.__name__ = trainer_cls.__name__
+
+        trainable_cls = wrap_function(train_coordinator_fn)
+        has_base_dataset = bool(self.datasets)
+        if has_base_dataset:
+            from ray.data.context import DataContext
+
+            dataset_context = DataContext.get_current()
+        else:
+            dataset_context = None
+
+        class TrainTrainable(trainable_cls):
+            """Adds default resources to the Trainable."""
+
+            _handles_checkpoint_freq = trainer_cls._handles_checkpoint_freq
+            _handles_checkpoint_at_end = trainer_cls._handles_checkpoint_at_end
+
+            @classmethod
+            def has_base_dataset(cls) -> bool:
+                """Whether a dataset is provided through the Trainer."""
+                return has_base_dataset
+
+            @classmethod
+            def base_scaling_config(cls) -> ScalingConfig:
+                """Returns the unchanged scaling config provided through the Trainer."""
+                return scaling_config
+
+            def setup(self, config, **kwargs):
+                base_config = dict(kwargs)
+                # Merge Tuner param space hyperparameters in `config` into the
+                # base config passed to the Trainer constructor, which is `base_config`.
+                # `base_config` is pulled from the object store from the usage of
+                # tune.with_parameters in `BaseTrainer.as_trainable`.
+
+                # run_config is not a tunable hyperparameter so it does not need to be
+                # merged.
+                run_config = base_config.pop("run_config", None)
+                self._merged_config = deep_update(
+                    base_config, self.config, new_keys_allowed=True
+                )
+                self._merged_config["run_config"] = run_config
+                merged_scaling_config = self._merged_config.get(
+                    "scaling_config", ScalingConfig()
+                )
+                if isinstance(merged_scaling_config, dict):
+                    merged_scaling_config = ScalingConfig(**merged_scaling_config)
+                self._merged_config[
+                    "scaling_config"
+                ] = self._reconcile_scaling_config_with_trial_resources(
+                    merged_scaling_config
+                )
+                if self.has_base_dataset():
+                    # Set the DataContext on the Trainer actor to the DataContext
+                    # specified on the driver.
+                    DataContext._set_current(dataset_context)
+                super(TrainTrainable, self).setup(config)
+
+            def _reconcile_scaling_config_with_trial_resources(
+                self, scaling_config: ScalingConfig
+            ) -> ScalingConfig:
+                """
+                ResourceChangingScheduler workaround.
+
+                Ensures that the scaling config matches trial resources.
+
+                This should be replaced with RCS returning a ScalingConfig
+                in the future.
+                """
+
+                trial_resources = self.trial_resources
+                # This will be false if the resources are default
+                if not isinstance(trial_resources, PlacementGroupFactory):
+                    return scaling_config
+
+                # Ignore ResourceChangingScheduler workaround when resource bundles
+                # are unchanged
+                if self.trial_resources == scaling_config.as_placement_group_factory():
+                    return scaling_config
+
+                trainer_cls._validate_scaling_config(scaling_config)
+
+                return ScalingConfig.from_placement_group_factory(trial_resources)
+
+            def _trainable_func(self, config):
+                # We ignore the config passed by Tune and instead use the merged
+                # config which includes the initial Trainer args.
+                super()._trainable_func(self._merged_config)
+
+            @classmethod
+            def default_resource_request(cls, config):
+                # `config["scaling_config"] is a dataclass when passed via the
+                # `scaling_config` argument in `Trainer` and is a dict when passed
+                # via the `scaling_config` key of `param_spec`.
+
+                # Conversion logic must be duplicated in `TrainTrainable.__init__`
+                # because this is a class method.
+                updated_scaling_config = config.get("scaling_config", scaling_config)
+                if isinstance(updated_scaling_config, dict):
+                    updated_scaling_config = ScalingConfig(**updated_scaling_config)
+                validated_scaling_config = trainer_cls._validate_scaling_config(
+                    updated_scaling_config
+                )
+                return validated_scaling_config.as_placement_group_factory()
+
+        return TrainTrainable
+
+    def as_trainable(self) -> Type["Trainable"]:
+        """Converts self to a ``tune.Trainable`` class."""
+        from ray import tune
+
+        base_config = self._param_dict
+        trainable_cls = self._generate_trainable_cls()
+
+        # Wrap with `tune.with_parameters` to handle very large values in base_config
+        return tune.with_parameters(trainable_cls, **base_config)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/constants.py b/.venv/lib/python3.11/site-packages/ray/train/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e42e30f4370a729a161a1485ba3a8811320d8af
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/constants.py
@@ -0,0 +1,118 @@
+from pathlib import Path
+
+import ray
+from ray._private.ray_constants import env_bool
+from ray.air.constants import (  # noqa: F401
+    COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING_ENV,
+    EVALUATION_DATASET_KEY,
+    MODEL_KEY,
+    PREPROCESSOR_KEY,
+    TRAIN_DATASET_KEY,
+)
+
+
+def _get_ray_train_session_dir() -> str:
+    assert ray.is_initialized(), "Ray must be initialized to get the session dir."
+    return Path(
+        ray._private.worker._global_node.get_session_dir_path(), "artifacts"
+    ).as_posix()
+
+
+DEFAULT_STORAGE_PATH = Path("~/ray_results").expanduser().as_posix()
+
+# Autofilled ray.train.report() metrics. Keys should be consistent with Tune.
+CHECKPOINT_DIR_NAME = "checkpoint_dir_name"
+TIME_TOTAL_S = "_time_total_s"
+WORKER_HOSTNAME = "_hostname"
+WORKER_NODE_IP = "_node_ip"
+WORKER_PID = "_pid"
+
+# Will not be reported unless ENABLE_DETAILED_AUTOFILLED_METRICS_ENV
+# env var is not 0
+DETAILED_AUTOFILLED_KEYS = {WORKER_HOSTNAME, WORKER_NODE_IP, WORKER_PID, TIME_TOTAL_S}
+
+# Default filename for JSON logger
+RESULT_FILE_JSON = "results.json"
+
+# The name of the subdirectory inside the trainer run_dir to store checkpoints.
+TRAIN_CHECKPOINT_SUBDIR = "checkpoints"
+
+# The key to use to specify the checkpoint id for Tune.
+# This needs to be added to the checkpoint dictionary so if the Tune trial
+# is restarted, the checkpoint_id can continue to increment.
+TUNE_CHECKPOINT_ID = "_current_checkpoint_id"
+
+# Deprecated configs can use this value to detect if the user has set it.
+_DEPRECATED_VALUE = "DEPRECATED"
+
+# ==================================================
+#               Environment Variables
+# ==================================================
+
+ENABLE_DETAILED_AUTOFILLED_METRICS_ENV = (
+    "TRAIN_RESULT_ENABLE_DETAILED_AUTOFILLED_METRICS"
+)
+
+# Integer value which if set will override the value of
+# Backend.share_cuda_visible_devices. 1 for True, 0 for False.
+ENABLE_SHARE_CUDA_VISIBLE_DEVICES_ENV = "TRAIN_ENABLE_SHARE_CUDA_VISIBLE_DEVICES"
+
+# Integer value which if set will not share ROCR accelerator visible devices
+# across workers. 1 for True (default), 0 for False.
+ENABLE_SHARE_ROCR_VISIBLE_DEVICES_ENV = "TRAIN_ENABLE_SHARE_ROCR_VISIBLE_DEVICES"
+
+# Integer value which if set will not share neuron-core accelerator visible cores
+# across workers. 1 for True (default), 0 for False.
+ENABLE_SHARE_NEURON_CORES_ACCELERATOR_ENV = (
+    "TRAIN_ENABLE_SHARE_NEURON_CORES_ACCELERATOR"
+)
+
+# Integer value which if set will not share npu visible devices
+# across workers. 1 for True (default), 0 for False.
+ENABLE_SHARE_NPU_RT_VISIBLE_DEVICES_ENV = "TRAIN_ENABLE_SHARE_ASCEND_RT_VISIBLE_DEVICES"
+
+# Integer value which indicates the number of seconds to wait when creating
+# the worker placement group before timing out.
+TRAIN_PLACEMENT_GROUP_TIMEOUT_S_ENV = "TRAIN_PLACEMENT_GROUP_TIMEOUT_S"
+
+# Integer value which if set will change the placement group strategy from
+# PACK to SPREAD. 1 for True, 0 for False.
+TRAIN_ENABLE_WORKER_SPREAD_ENV = "TRAIN_ENABLE_WORKER_SPREAD"
+
+# Set this to 0 to disable changing the working directory of each Tune Trainable
+# or Train worker to the trial directory. Defaults to 1.
+RAY_CHDIR_TO_TRIAL_DIR = "RAY_CHDIR_TO_TRIAL_DIR"
+
+# Set this to 1 to count preemption errors toward `FailureConfig(max_failures)`.
+# Defaults to 0, which always retries on node preemption failures.
+RAY_TRAIN_COUNT_PREEMPTION_AS_FAILURE = "RAY_TRAIN_COUNT_PREEMPTION_AS_FAILURE"
+
+# Set this to 1 to start a StateActor and collect information Train Runs
+# Defaults to 0
+RAY_TRAIN_ENABLE_STATE_TRACKING = "RAY_TRAIN_ENABLE_STATE_TRACKING"
+
+# Set this to 1 to enable deprecation warnings for V2 migration.
+ENABLE_V2_MIGRATION_WARNINGS_ENV_VAR = "RAY_TRAIN_ENABLE_V2_MIGRATION_WARNINGS"
+
+
+def _v2_migration_warnings_enabled() -> bool:
+    return env_bool(ENABLE_V2_MIGRATION_WARNINGS_ENV_VAR, False)
+
+
+# NOTE: When adding a new environment variable, please track it in this list.
+TRAIN_ENV_VARS = {
+    ENABLE_DETAILED_AUTOFILLED_METRICS_ENV,
+    ENABLE_SHARE_CUDA_VISIBLE_DEVICES_ENV,
+    ENABLE_SHARE_NEURON_CORES_ACCELERATOR_ENV,
+    TRAIN_PLACEMENT_GROUP_TIMEOUT_S_ENV,
+    TRAIN_ENABLE_WORKER_SPREAD_ENV,
+    RAY_CHDIR_TO_TRIAL_DIR,
+    RAY_TRAIN_COUNT_PREEMPTION_AS_FAILURE,
+    RAY_TRAIN_ENABLE_STATE_TRACKING,
+}
+
+# Key for AIR Checkpoint metadata in TrainingResult metadata
+CHECKPOINT_METADATA_KEY = "checkpoint_metadata"
+
+# Key for AIR Checkpoint world rank in TrainingResult metadata
+CHECKPOINT_RANK_KEY = "checkpoint_rank"
diff --git a/.venv/lib/python3.11/site-packages/ray/train/context.py b/.venv/lib/python3.11/site-packages/ray/train/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc447b36f2024dba0c2e88aae26d4551b3c3e23f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/context.py
@@ -0,0 +1,139 @@
+import threading
+from typing import TYPE_CHECKING, Any, Dict, Optional
+
+from ray.train._internal import session
+from ray.train._internal.storage import StorageContext
+from ray.train.constants import _v2_migration_warnings_enabled
+from ray.train.utils import _copy_doc, _log_deprecation_warning
+from ray.util.annotations import Deprecated, DeveloperAPI, PublicAPI
+
+if TYPE_CHECKING:
+    from ray.tune.execution.placement_groups import PlacementGroupFactory
+
+
+# The context singleton on this process.
+_default_context: "Optional[TrainContext]" = None
+_context_lock = threading.Lock()
+
+
+_GET_METADATA_DEPRECATION_MESSAGE = (
+    "`get_metadata` was an experimental API that accessed the metadata passed "
+    "to `<Framework>Trainer(metadata=...)`. This API can be replaced by passing "
+    "the metadata directly to the training function (e.g., via `train_loop_config`)."
+)
+
+_TUNE_SPECIFIC_CONTEXT_DEPRECATION_MESSAGE = (
+    "`{}` is deprecated because the concept of a `Trial` will "
+    "soon be removed in Ray Train (see here: "
+    "https://github.com/ray-project/enhancements/pull/57). "
+    "Ray Train will no longer assume that it's running within a Ray Tune `Trial` "
+    "in the future."
+)
+
+
+@PublicAPI(stability="stable")
+class TrainContext:
+    """Context containing metadata that can be accessed within Ray Train workers."""
+
+    @_copy_doc(session.get_experiment_name)
+    def get_experiment_name(self) -> str:
+        return session.get_experiment_name()
+
+    @_copy_doc(session.get_world_size)
+    def get_world_size(self) -> int:
+        return session.get_world_size()
+
+    @_copy_doc(session.get_world_rank)
+    def get_world_rank(self) -> int:
+        return session.get_world_rank()
+
+    @_copy_doc(session.get_local_rank)
+    def get_local_rank(self) -> int:
+        return session.get_local_rank()
+
+    @_copy_doc(session.get_local_world_size)
+    def get_local_world_size(self) -> int:
+        return session.get_local_world_size()
+
+    @_copy_doc(session.get_node_rank)
+    def get_node_rank(self) -> int:
+        return session.get_node_rank()
+
+    @DeveloperAPI
+    @_copy_doc(session.get_storage)
+    def get_storage(self) -> StorageContext:
+        return session.get_storage()
+
+    # Deprecated APIs
+
+    @Deprecated(
+        message=_GET_METADATA_DEPRECATION_MESSAGE,
+        warning=_v2_migration_warnings_enabled(),
+    )
+    @_copy_doc(session.get_metadata)
+    def get_metadata(self) -> Dict[str, Any]:
+        return session.get_metadata()
+
+    @Deprecated(
+        message=_TUNE_SPECIFIC_CONTEXT_DEPRECATION_MESSAGE.format("get_trial_name"),
+        warning=_v2_migration_warnings_enabled(),
+    )
+    @_copy_doc(session.get_trial_name)
+    def get_trial_name(self) -> str:
+        return session.get_trial_name()
+
+    @Deprecated(
+        message=_TUNE_SPECIFIC_CONTEXT_DEPRECATION_MESSAGE.format("get_trial_id"),
+        warning=_v2_migration_warnings_enabled(),
+    )
+    @_copy_doc(session.get_trial_id)
+    def get_trial_id(self) -> str:
+        return session.get_trial_id()
+
+    @Deprecated(
+        message=_TUNE_SPECIFIC_CONTEXT_DEPRECATION_MESSAGE.format(
+            "get_trial_resources"
+        ),
+        warning=_v2_migration_warnings_enabled(),
+    )
+    @_copy_doc(session.get_trial_resources)
+    def get_trial_resources(self) -> "PlacementGroupFactory":
+        return session.get_trial_resources()
+
+    @Deprecated(
+        message=_TUNE_SPECIFIC_CONTEXT_DEPRECATION_MESSAGE.format("get_trial_dir"),
+        warning=_v2_migration_warnings_enabled(),
+    )
+    @_copy_doc(session.get_trial_dir)
+    def get_trial_dir(self) -> str:
+        return session.get_trial_dir()
+
+
+@PublicAPI(stability="stable")
+def get_context() -> TrainContext:
+    """Get or create a singleton training context.
+
+    The context is only available within a function passed to Ray Train.
+
+    See the :class:`~ray.train.TrainContext` API reference to see available methods.
+    """
+    from ray.tune.trainable.trainable_fn_utils import _in_tune_session
+
+    # If we are running in a Tune function, switch to Tune context.
+    if _in_tune_session():
+        from ray.tune import get_context as get_tune_context
+
+        if _v2_migration_warnings_enabled():
+            _log_deprecation_warning(
+                "`ray.train.get_context()` should be switched to "
+                "`ray.tune.get_context()` when running in a function "
+                "passed to Ray Tune. This will be an error in the future."
+            )
+        return get_tune_context()
+
+    global _default_context
+
+    with _context_lock:
+        if _default_context is None:
+            _default_context = TrainContext()
+        return _default_context
diff --git a/.venv/lib/python3.11/site-packages/ray/train/data_parallel_trainer.py b/.venv/lib/python3.11/site-packages/ray/train/data_parallel_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a14dc47d36dd3d238c0945675bf18a9310aa1b60
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/data_parallel_trainer.py
@@ -0,0 +1,587 @@
+import logging
+import uuid
+from typing import Any, Callable, Dict, List, Optional, Type, Union
+
+import ray
+from ray._private.ray_constants import env_integer
+from ray._private.thirdparty.tabulate.tabulate import tabulate
+from ray.air.config import RunConfig, ScalingConfig
+from ray.train import BackendConfig, Checkpoint, TrainingIterator
+from ray.train._internal import session
+from ray.train._internal.backend_executor import BackendExecutor, TrialInfo
+from ray.train._internal.data_config import DataConfig
+from ray.train._internal.session import _TrainingResult, get_session
+from ray.train._internal.utils import construct_train_func, count_required_parameters
+from ray.train.constants import RAY_TRAIN_ENABLE_STATE_TRACKING
+from ray.train.trainer import BaseTrainer, GenDataset
+from ray.util.annotations import DeveloperAPI, PublicAPI
+from ray.widgets import Template
+from ray.widgets.util import repr_with_fallback
+
+logger = logging.getLogger(__name__)
+
+
+@DeveloperAPI
+class DataParallelTrainer(BaseTrainer):
+    """A Trainer for data parallel training.
+
+    You should subclass this Trainer if your Trainer follows SPMD (single program,
+    multiple data) programming paradigm - you want multiple processes to run the same
+    function, but on different data.
+
+    This Trainer runs the function ``train_loop_per_worker`` on multiple Ray
+    Actors.
+
+    The ``train_loop_per_worker`` function is expected to take in either 0 or 1
+    arguments:
+
+    .. testcode::
+
+        def train_loop_per_worker():
+            ...
+
+    .. testcode::
+
+        def train_loop_per_worker(config: Dict):
+            ...
+
+    If ``train_loop_per_worker`` accepts an argument, then
+    ``train_loop_config`` will be passed in as the argument. This is useful if you
+    want to tune the values in ``train_loop_config`` as hyperparameters.
+
+    If the ``datasets`` dict contains a training dataset (denoted by
+    the "train" key), then it will be split into multiple dataset
+    shards that can then be accessed by ``train.get_dataset_shard("train")`` inside
+    ``train_loop_per_worker``. All the other datasets will not be split and
+    ``train.get_dataset_shard(...)`` will return the the entire Dataset.
+
+    Inside the ``train_loop_per_worker`` function, you can use any of the
+    :ref:`Ray Train loop methods <train-loop-api>`.
+
+    .. testcode::
+
+        from ray import train
+
+        def train_loop_per_worker():
+            # Report intermediate results for callbacks or logging and
+            # checkpoint data.
+            train.report(...)
+
+            # Returns dict of last saved checkpoint.
+            train.get_checkpoint()
+
+            # Returns the Dataset shard for the given key.
+            train.get_dataset_shard("my_dataset")
+
+            # Returns the total number of workers executing training.
+            train.get_context().get_world_size()
+
+            # Returns the rank of this worker.
+            train.get_context().get_world_rank()
+
+            # Returns the rank of the worker on the current node.
+            train.get_context().get_local_rank()
+
+    Any returns from the ``train_loop_per_worker`` will be discarded and not
+    used or persisted anywhere.
+
+    **How do I use DataParallelTrainer or any of its subclasses?**
+
+    Example:
+
+    .. testcode::
+
+        import ray
+        from ray import train
+        from ray.train import ScalingConfig
+        from ray.train.data_parallel_trainer import DataParallelTrainer
+
+        def train_loop_for_worker():
+            dataset_shard_for_this_worker = train.get_dataset_shard("train")
+
+            # 3 items for 3 workers, each worker gets 1 item
+            batches = list(dataset_shard_for_this_worker.iter_batches(batch_size=1))
+            assert len(batches) == 1
+
+        train_dataset = ray.data.from_items([1, 2, 3])
+        assert train_dataset.count() == 3
+        trainer = DataParallelTrainer(
+            train_loop_for_worker,
+            scaling_config=ScalingConfig(num_workers=3),
+            datasets={"train": train_dataset},
+        )
+        result = trainer.fit()
+
+    .. testoutput::
+            :hide:
+
+            ...
+
+    **How do I develop on top of DataParallelTrainer?**
+
+    In many cases, using DataParallelTrainer directly is sufficient to execute
+    functions on multiple actors.
+
+    However, you may want to subclass ``DataParallelTrainer`` and create a custom
+    Trainer for the following 2 use cases:
+
+      - **Use Case 1:** You want to do data parallel training, but want to have
+        a predefined ``training_loop_per_worker``.
+
+      - **Use Case 2:** You want to implement a custom
+        :py:class:`~ray.train.backend.Backend` that automatically handles
+        additional setup or teardown logic on each actor, so that the users of this
+        new trainer do not have to implement this logic. For example, a
+        ``TensorflowTrainer`` can be built on top of ``DataParallelTrainer``
+        that automatically handles setting the proper environment variables for
+        distributed Tensorflow on each actor.
+
+    For 1, you can set a predefined training loop in __init__
+
+    .. testcode::
+
+        from ray.train.data_parallel_trainer import DataParallelTrainer
+
+        class MyDataParallelTrainer(DataParallelTrainer):
+            def __init__(self, *args, **kwargs):
+                predefined_train_loop_per_worker = lambda: 1
+                super().__init__(predefined_train_loop_per_worker, *args, **kwargs)
+
+
+    For 2, you can implement the ``ray.train.Backend`` and ``ray.train.BackendConfig``
+    interfaces.
+
+    .. testcode::
+
+        from dataclasses import dataclass
+        from ray.train.backend import Backend, BackendConfig
+
+        class MyBackend(Backend):
+            def on_start(self, worker_group, backend_config):
+                def set_env_var(env_var_value):
+                    import os
+                    os.environ["MY_ENV_VAR"] = env_var_value
+
+                worker_group.execute(set_env_var, backend_config.env_var)
+
+        @dataclass
+        class MyBackendConfig(BackendConfig):
+            env_var: str = "default_value"
+
+            def backend_cls(self):
+                return MyBackend
+
+        class MyTrainer(DataParallelTrainer):
+            def __init__(self, train_loop_per_worker, my_backend_config:
+                MyBackendConfig, **kwargs):
+
+                super().__init__(
+                    train_loop_per_worker,
+                    backend_config=my_backend_config, **kwargs)
+
+    Args:
+        train_loop_per_worker: The training function to execute.
+            This can either take in no arguments or a ``config`` dict.
+        train_loop_config: Configurations to pass into
+            ``train_loop_per_worker`` if it accepts an argument.
+        backend_config: Configuration for setting up a Backend (e.g. Torch,
+            Tensorflow, Horovod) on each worker to enable distributed
+            communication. If no Backend should be set up, then set this to None.
+        scaling_config: Configuration for how to scale data parallel training.
+        dataset_config: Configuration for dataset ingest. This is merged with the
+            default dataset config for the given trainer (`cls._dataset_config`).
+        run_config: Configuration for the execution of the training run.
+        datasets: Ray Datasets to use for training and evaluation.
+            This is a dict where the key is the name of the dataset, which
+            can be accessed from within the ``train_loop_per_worker`` by calling
+            ``train.get_dataset_shard(dataset_key)``.
+            By default, all datasets are sharded equally across workers.
+            This can be configured via ``dataset_config``.
+        metadata: Dict that should be made available via
+            `train.get_context().get_metadata()` and in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+        resume_from_checkpoint: A checkpoint to resume training from.
+    """
+
+    # Exposed here for testing purposes. Should never need
+    # to be overriden.
+    _backend_executor_cls: Type[BackendExecutor] = BackendExecutor
+    _training_iterator_cls: Type[TrainingIterator] = TrainingIterator
+
+    _scaling_config_allowed_keys = BaseTrainer._scaling_config_allowed_keys + [
+        "num_workers",
+        "resources_per_worker",
+        "use_gpu",
+        "placement_strategy",
+        "accelerator_type",
+    ]
+
+    # For backwards compatibility with the legacy dataset config API.
+    _dataset_config = None
+
+    _fields_for_tuner_param_space = BaseTrainer._fields_for_tuner_param_space + [
+        "train_loop_config"
+    ]
+
+    def __init__(
+        self,
+        train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]],
+        *,
+        train_loop_config: Optional[Dict] = None,
+        backend_config: Optional[BackendConfig] = None,
+        scaling_config: Optional[ScalingConfig] = None,
+        dataset_config: Optional[DataConfig] = None,
+        run_config: Optional[RunConfig] = None,
+        datasets: Optional[Dict[str, GenDataset]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+    ):
+        self._train_loop_per_worker = train_loop_per_worker
+        self._train_loop_config = train_loop_config
+
+        if dataset_config is None:
+            dataset_config = DataConfig()
+
+        if not isinstance(dataset_config, DataConfig):
+            raise ValueError(
+                "`dataset_config` must be an instance of ray.train.DataConfig, "
+                f"was: {dataset_config}"
+            )
+        self._data_config = dataset_config
+
+        backend_config = (
+            backend_config if backend_config is not None else BackendConfig()
+        )
+        self._backend_config = backend_config
+
+        super(DataParallelTrainer, self).__init__(
+            scaling_config=scaling_config,
+            run_config=run_config,
+            datasets=datasets,
+            metadata=metadata,
+            resume_from_checkpoint=resume_from_checkpoint,
+        )
+
+        train_total_resources = self.scaling_config.total_resources
+        self._data_config.set_train_total_resources(
+            train_total_resources.get("CPU", 0),
+            train_total_resources.get("GPU", 0),
+        )
+
+        if env_integer(RAY_TRAIN_ENABLE_STATE_TRACKING, 0):
+            from ray.train._internal.state.state_actor import get_or_create_state_actor
+
+            get_or_create_state_actor()
+
+    @PublicAPI(stability="beta")
+    @classmethod
+    def restore(
+        cls: Type["DataParallelTrainer"],
+        path: str,
+        train_loop_per_worker: Optional[
+            Union[Callable[[], None], Callable[[Dict], None]]
+        ] = None,
+        train_loop_config: Optional[Dict] = None,
+        **kwargs,
+    ) -> "DataParallelTrainer":
+        """Restores a DataParallelTrainer from a previously interrupted/failed run.
+
+        Args:
+            train_loop_per_worker: Optionally re-specified train loop function.
+                This should be used to re-specify a function that is not
+                restorable in a new Ray cluster (e.g., it holds onto outdated
+                object references). This should be the same training loop
+                that was passed to the original trainer constructor.
+            train_loop_config: Optionally re-specified train config.
+                This should similarly be used if the original `train_loop_config`
+                contained outdated object references, and it should not be modified
+                from what was originally passed in.
+
+        See :meth:`BaseTrainer.restore() <ray.train.trainer.BaseTrainer.restore>`
+        for descriptions of the other arguments.
+
+        Returns:
+            DataParallelTrainer: A restored instance of the `DataParallelTrainer`
+            subclass that is calling this method.
+        """
+        return super(DataParallelTrainer, cls).restore(
+            path=path,
+            train_loop_per_worker=train_loop_per_worker,
+            train_loop_config=train_loop_config,
+            **kwargs,
+        )
+
+    def _validate_attributes(self):
+        super()._validate_attributes()
+
+        self._validate_train_loop_per_worker(
+            self._train_loop_per_worker, "train_loop_per_worker"
+        )
+
+    def _validate_train_loop_per_worker(
+        self, train_loop_per_worker: Callable, fn_name: str
+    ) -> None:
+        num_required_params = count_required_parameters(train_loop_per_worker)
+        if num_required_params > 1:
+            raise ValueError(
+                f"{fn_name} should take in 0 or 1 arguments, "
+                f"but it accepts {num_required_params} arguments instead."
+            )
+
+    @classmethod
+    def _validate_scaling_config(cls, scaling_config: ScalingConfig) -> ScalingConfig:
+        scaling_config = super(DataParallelTrainer, cls)._validate_scaling_config(
+            scaling_config
+        )
+
+        # This validation happens after the scaling config is updated from
+        # its specification in the Tuner `param_space`
+        if not scaling_config.use_gpu and "GPU" in ray.available_resources():
+            logger.info(
+                "GPUs are detected in your Ray cluster, but GPU "
+                "training is not enabled for this trainer. To enable "
+                "GPU training, make sure to set `use_gpu` to True "
+                "in your scaling config."
+            )
+
+        if scaling_config.num_workers is None:
+            raise ValueError(
+                "You must specify the 'num_workers' in `scaling_config` as either an "
+                f"argument of `{cls.__name__}` or through the `param_space` of a "
+                "`Tuner` (if performing hyperparameter tuning)."
+            )
+
+        if scaling_config.num_workers <= 0:
+            raise ValueError(
+                "'num_workers' in `scaling_config` must be a positive "
+                f"integer. Received {scaling_config.num_workers}"
+            )
+
+        return scaling_config
+
+    def _run_training(self, training_iterator: TrainingIterator) -> None:
+        """This method loops over the `TrainingIterator`:
+        The actual iteration (for ... in ...) waits for the training function
+        on each worker to report a result and supplies it as a list of results.
+        Afterwards (in the body of the loop), it will report the result
+        to the Tune session.
+        The iterator ends after the training function on each worker has finished.
+        """
+        for training_results in training_iterator:
+            # TODO(ml-team): add ability to report results from multiple workers.
+            self._propagate_results(training_results)
+
+    def _propagate_results(self, training_results: List[_TrainingResult]):
+        first_worker_result = training_results[0]
+        assert all(isinstance(result, _TrainingResult) for result in training_results)
+
+        tune_session = get_session()
+
+        # Check if any workers reported a checkpoint.
+        # If so, report a checkpoint pointing to the persisted location
+        # to Tune for book-keeping.
+        # NOTE: This removes the restriction for any individual worker
+        # (ex: global rank 0 worker) from needing to report a checkpoint.
+        # All workers reported a checkpoint to the same fs path, so there's
+        # no need to report multiple checkpoints to Tune.
+        worker_checkpoints = [
+            result.checkpoint
+            for result in training_results
+            if result.checkpoint is not None
+        ]
+        at_least_one_reported_checkpoint = len(worker_checkpoints) > 0
+
+        if at_least_one_reported_checkpoint:
+            # Update the coordinator's checkpoint index to the latest.
+            # This is what keeps the checkpoint index in line with the workers.
+            tune_session.storage._update_checkpoint_index(first_worker_result.metrics)
+
+        # Make sure that all workers uploaded to the same location.
+        assert all(
+            checkpoint.path == tune_session.storage.checkpoint_fs_path
+            for checkpoint in worker_checkpoints
+        )
+
+        checkpoint = (
+            Checkpoint(
+                filesystem=tune_session.storage.storage_filesystem,
+                path=tune_session.storage.checkpoint_fs_path,
+            )
+            if at_least_one_reported_checkpoint
+            else None
+        )
+
+        tracked_training_result = _TrainingResult(
+            checkpoint=checkpoint,
+            metrics=first_worker_result.metrics,
+        )
+
+        logger.debug(
+            "Report (metrics, checkpoint) to the Tune session:\n"
+            f"  metrics={tracked_training_result.metrics}\n"
+            f"  checkpoint={tracked_training_result.checkpoint}"
+        )
+
+        # Report the metrics and checkpoint to Tune.
+        tune_session._report_training_result(tracked_training_result)
+
+    def training_loop(self) -> None:
+        scaling_config = self._validate_scaling_config(self.scaling_config)
+
+        train_loop_per_worker = construct_train_func(
+            self._train_loop_per_worker,
+            self._train_loop_config,
+            train_func_context=self._backend_config.train_func_context,
+            fn_arg_name="train_loop_per_worker",
+            discard_returns=True,
+        )
+
+        trial_info = TrialInfo(
+            name=session.get_trial_name(),
+            id=session.get_trial_id(),
+            resources=session.get_trial_resources(),
+            logdir=session.get_trial_dir(),
+            driver_ip=ray.util.get_node_ip_address(),
+            driver_node_id=ray.get_runtime_context().get_node_id(),
+            experiment_name=session.get_experiment_name(),
+            run_id=uuid.uuid4().hex,
+        )
+
+        backend_executor = self._backend_executor_cls(
+            backend_config=self._backend_config,
+            trial_info=trial_info,
+            num_workers=scaling_config.num_workers,
+            resources_per_worker=scaling_config._resources_per_worker_not_none,
+            max_retries=0,
+        )
+
+        # Start the remote actors.
+        backend_executor.start()
+
+        training_iterator = self._training_iterator_cls(
+            backend_executor=backend_executor,
+            backend_config=self._backend_config,
+            train_func=train_loop_per_worker,
+            datasets=self.datasets,
+            metadata=self.metadata,
+            data_config=self._data_config,
+            checkpoint=self.starting_checkpoint,
+        )
+
+        self._run_training(training_iterator)
+
+        # Shutdown workers.
+        backend_executor.shutdown()
+
+    def get_dataset_config(self) -> DataConfig:
+        """Returns a copy of this Trainer's final dataset configs.
+
+        Returns:
+            The merged default + user-supplied dataset config.
+        """
+
+        return self._data_config
+
+    @repr_with_fallback(["ipywidgets", "8"])
+    def _repr_mimebundle_(self, **kwargs):
+        """Returns a mimebundle with an ipywidget repr and a simple text repr.
+
+        Depending on the frontend where the data is being displayed,
+        different mimetypes will be used from this bundle.
+        See https://ipython.readthedocs.io/en/stable/config/integrating.html
+        for information about this method, and
+        https://ipywidgets.readthedocs.io/en/latest/embedding.html
+        for more information about the jupyter widget mimetype.
+
+        Returns:
+            A mimebundle containing an ipywidget repr and a simple text repr.
+        """
+        from ipywidgets import HTML, Layout, Tab, VBox
+
+        title = HTML(f"<h2>{self.__class__.__name__}</h2>")
+
+        children = []
+        titles = []
+
+        if self.datasets:
+            children.append(self._datasets_repr_())
+            titles.append("Datasets")
+
+            children.append(HTML(self._data_config_repr_html_()))
+            titles.append("Data Config")
+
+        if self._train_loop_config:
+            children.append(HTML(self._train_loop_config_repr_html_()))
+            titles.append("Train Loop Config")
+
+        if self.scaling_config:
+            children.append(HTML(self.scaling_config._repr_html_()))
+            titles.append("Scaling Config")
+
+        if self.run_config:
+            children.append(HTML(self.run_config._repr_html_()))
+            titles.append("Run Config")
+
+        if self._backend_config:
+            children.append(HTML(self._backend_config._repr_html_()))
+            titles.append("Backend Config")
+
+        tab = Tab(children, titles=titles)
+        widget = VBox([title, tab], layout=Layout(width="100%"))
+        bundle = widget._repr_mimebundle_(**kwargs)
+        bundle.update(
+            {
+                "text/plain": repr(self),
+            }
+        )
+        return bundle
+
+    def _train_loop_config_repr_html_(self) -> str:
+        if self._train_loop_config:
+            table_data = {}
+            for k, v in self._train_loop_config.items():
+                if isinstance(v, str) or str(v).isnumeric():
+                    table_data[k] = v
+                elif hasattr(v, "_repr_html_"):
+                    table_data[k] = v._repr_html_()
+                else:
+                    table_data[k] = str(v)
+
+            return Template("title_data.html.j2").render(
+                title="Train Loop Config",
+                data=Template("scrollableTable.html.j2").render(
+                    table=tabulate(
+                        table_data.items(),
+                        headers=["Setting", "Value"],
+                        showindex=False,
+                        tablefmt="unsafehtml",
+                    ),
+                    max_height="none",
+                ),
+            )
+        else:
+            return ""
+
+    def _data_config_repr_html_(self) -> str:
+        # TODO make this rendering nicer.
+        content = [str(self._data_config)]
+        return Template("rendered_html_common.html.j2").render(content=content)
+
+    def _datasets_repr_(self) -> str:
+        from ipywidgets import HTML, Layout, VBox
+
+        content = []
+        if self.datasets:
+            for name, config in self.datasets.items():
+                tab = config._tab_repr_()
+                if tab:
+                    content.append(
+                        HTML(
+                            Template("title_data.html.j2").render(
+                                title=f"Dataset - <code>{name}</code>", data=None
+                            )
+                        )
+                    )
+                    content.append(config._tab_repr_())
+
+        return VBox(content, layout=Layout(width="100%"))
diff --git a/.venv/lib/python3.11/site-packages/ray/train/error.py b/.venv/lib/python3.11/site-packages/ray/train/error.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aa8c82471bbe8c800b2415c8af3b1aef601d00d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/error.py
@@ -0,0 +1,6 @@
+from ray.util.annotations import PublicAPI
+
+
+@PublicAPI(stability="beta")
+class SessionMisuseError(Exception):
+    """Indicates a method or function was used outside of a session."""
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/examples/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/mlflow_simple_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/mlflow_simple_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e1a49f83bb22eb166f6aabba76b4e9968a00296
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/mlflow_simple_example.py
@@ -0,0 +1,55 @@
+from pathlib import Path
+
+from ray import train
+from ray.train import RunConfig, ScalingConfig
+from ray.train.torch import TorchTrainer
+from ray.tune.logger import TBXLoggerCallback
+from ray.tune.logger.mlflow import MLflowLoggerCallback
+
+
+def train_func():
+    for i in range(3):
+        train.report(dict(epoch=i))
+
+
+trainer = TorchTrainer(
+    train_func,
+    scaling_config=ScalingConfig(num_workers=2),
+    run_config=RunConfig(
+        callbacks=[
+            MLflowLoggerCallback(experiment_name="train_experiment"),
+            TBXLoggerCallback(),
+        ],
+    ),
+)
+
+# Run the training function, logging all the intermediate results
+# to MLflow and Tensorboard.
+result = trainer.fit()
+
+# For MLFLow logs:
+
+# MLFlow logs will by default be saved in an `mlflow` directory
+# in the current working directory.
+
+# $ cd mlflow
+# # View the MLflow UI.
+# $ mlflow ui
+
+# You can change the directory by setting the `tracking_uri` argument
+# in `MLflowLoggerCallback`.
+
+# For TensorBoard logs:
+
+# Print the latest run directory and keep note of it.
+# For example: /home/ubuntu/ray_results/TorchTrainer_2022-06-13_20-31-06
+print("Run directory:", Path(result.path).parent)  # TensorBoard is saved in parent dir
+
+# How to visualize the logs
+
+# Navigate to the run directory of the trainer.
+# For example `cd /home/ubuntu/ray_results/TorchTrainer_2022-06-13_20-31-06`
+# $ cd <TRAINER_RUN_DIR>
+#
+# # View the tensorboard UI.
+# $ tensorboard --logdir .
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tune_tensorflow_autoencoder_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tune_tensorflow_autoencoder_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c2d90b1887671e8dc35820ca900af10425e64a2
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tune_tensorflow_autoencoder_example.py
@@ -0,0 +1,77 @@
+import argparse
+
+import ray
+from ray import tune
+from ray.train import ScalingConfig
+from ray.train.examples.tf.tensorflow_mnist_example import train_func
+from ray.train.tensorflow import TensorflowTrainer
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner
+
+
+def tune_tensorflow_mnist(
+    num_workers: int = 2, num_samples: int = 2, use_gpu: bool = False
+):
+    scaling_config = ScalingConfig(num_workers=num_workers, use_gpu=use_gpu)
+    trainer = TensorflowTrainer(
+        train_loop_per_worker=train_func,
+        scaling_config=scaling_config,
+    )
+    tuner = Tuner(
+        trainer,
+        tune_config=TuneConfig(
+            num_samples=num_samples, metric="binary_crossentropy", mode="min"
+        ),
+        param_space={
+            "train_loop_config": {
+                "lr": tune.loguniform(1e-4, 1e-1),
+                "batch_size": tune.choice([32, 64, 128]),
+                "epochs": 3,
+            }
+        },
+    )
+    best_accuracy = tuner.fit().get_best_result().metrics["binary_crossentropy"]
+    print(f"Best accuracy config: {best_accuracy}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=2,
+        help="Sets number of samples for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Enables GPU training"
+    )
+
+    args = parser.parse_args()
+
+    if args.smoke_test:
+        num_gpus = args.num_workers if args.use_gpu else 0
+        ray.init(num_cpus=8, num_gpus=num_gpus)
+        tune_tensorflow_mnist(num_workers=2, num_samples=2, use_gpu=args.use_gpu)
+    else:
+        ray.init(address=args.address)
+        tune_tensorflow_mnist(
+            num_workers=args.num_workers,
+            num_samples=args.num_samples,
+            use_gpu=args.use_gpu,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/huggingface/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/huggingface/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/huggingface/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/huggingface/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..435c3182eb6e3e02060407a0f991438e3fda3e2d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/huggingface/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..870fe4ee94b873e077a7bf0d3bf48429e1bf8df0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__init__.py
@@ -0,0 +1,12 @@
+from ray.train.huggingface.transformers._transformers_utils import (
+    RayTrainReportCallback,
+    prepare_trainer,
+)
+
+__all__ = [
+    "RayTrainReportCallback",
+    "prepare_trainer",
+]
+
+
+# DO NOT ADD ANYTHING AFTER THIS LINE.
diff --git a/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d8f3b5bb0092adf51d6535936227f272ad43cf2
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__pycache__/_transformers_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__pycache__/_transformers_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..42c345d0216f293ddf88613000734d36017a57e8
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/__pycache__/_transformers_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/_transformers_utils.py b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/_transformers_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c522b81cfbf1c8756e26fd42c2692f3bdb421964
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/huggingface/transformers/_transformers_utils.py
@@ -0,0 +1,143 @@
+import logging
+import shutil
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Iterator, Optional, Type
+
+from torch.utils.data import DataLoader, Dataset, IterableDataset
+
+import ray
+from ray._private.usage.usage_lib import TagKey, record_extra_usage_tag
+from ray.data.iterator import _IterableFromIterator
+from ray.train import Checkpoint
+from ray.util import PublicAPI
+
+logger = logging.getLogger(__name__)
+
+
+TRANSFORMERS_IMPORT_ERROR: Optional[ImportError] = None
+
+try:
+    import transformers.trainer
+    from transformers import Trainer
+    from transformers.trainer_callback import TrainerCallback
+except ImportError as e:
+    TRANSFORMERS_IMPORT_ERROR = e
+    TrainerCallback = object
+
+
+@PublicAPI(stability="beta")
+class RayTrainReportCallback(TrainerCallback):
+    """A simple callback to report checkpoints and metrics to Ray Train.
+
+    This callback is a subclass of `transformers.TrainerCallback
+    <https://huggingface.co/docs/transformers/main/en/main_classes/callback#transformers.TrainerCallback>`_
+    and overrides the `TrainerCallback.on_save()` method. After
+    a new checkpoint get saved, it fetches the latest metric dictionary
+    from `TrainerState.log_history` and reports it with the latest checkpoint
+    to Ray Train.
+
+    Checkpoints will be saved in the following structure::
+
+        checkpoint_00000*/   Ray Train Checkpoint
+        └─ checkpoint/       Hugging Face Transformers Checkpoint
+
+    For customized reporting and checkpointing logic, implement your own
+    `transformers.TrainerCallback` following this user
+    guide: :ref:`Saving and Loading Checkpoints <train-dl-saving-checkpoints>`.
+
+    Note that users should ensure that the logging, evaluation, and saving frequencies
+    are properly configured so that the monitoring metric is always up-to-date
+    when `transformers.Trainer` saves a checkpoint.
+
+    Suppose the monitoring metric is reported from evaluation stage:
+
+    Some valid configurations:
+        - evaluation_strategy == save_strategy == "epoch"
+        - evaluation_strategy == save_strategy == "steps", save_steps % eval_steps == 0
+
+    Some invalid configurations:
+        - evaluation_strategy != save_strategy
+        - evaluation_strategy == save_strategy == "steps", save_steps % eval_steps != 0
+
+    """
+
+    CHECKPOINT_NAME = "checkpoint"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        record_extra_usage_tag(TagKey.TRAIN_TRANSFORMERS_RAYTRAINREPORTCALLBACK, "1")
+
+    def on_save(self, args, state, control, **kwargs):
+        """Event called after a checkpoint save."""
+        with TemporaryDirectory() as tmpdir:
+            # Aggregate all the logged metrics
+            metrics = {}
+            for log in state.log_history:
+                metrics.update(log)
+
+            # Copy ckpt files and construct a Ray Train Checkpoint
+            source_ckpt_path = transformers.trainer.get_last_checkpoint(args.output_dir)
+            if source_ckpt_path is not None:
+                target_ckpt_path = Path(tmpdir, self.CHECKPOINT_NAME).as_posix()
+                shutil.copytree(source_ckpt_path, target_ckpt_path)
+                checkpoint = Checkpoint.from_directory(tmpdir)
+            else:
+                checkpoint = None
+
+            # Report latest metrics and checkpoint to Ray Train
+            ray.train.report(metrics=metrics, checkpoint=checkpoint)
+
+
+class RayTorchIterableDataset(IterableDataset):
+    """Wrapper class for ray data iterables."""
+
+    def __init__(self, data_iterable) -> None:
+        super().__init__()
+        self.data_iterable = data_iterable
+
+    def __iter__(self) -> Iterator:
+        return iter(self.data_iterable)
+
+
+@PublicAPI(stability="beta")
+def prepare_trainer(trainer: "Trainer") -> "Trainer":
+    """Prepare your HuggingFace Transformer Trainer for Ray Train.
+
+    This utility function enable the trainer integrates with Ray Data Integration.
+    Internally, it overrides the `get_train_dataloader` and `get_eval_dataloader`
+    methods and inject the data integration logics if the `train_dataset` and
+    `eval_dataset` are Ray Data Iterables.
+    """
+
+    if TRANSFORMERS_IMPORT_ERROR is not None:
+        raise TRANSFORMERS_IMPORT_ERROR
+
+    base_trainer_class: Type[transformers.trainer.Trainer] = trainer.__class__
+
+    class RayTransformersTrainer(base_trainer_class):
+        """A Wrapper of `transformers.Trainer` for Ray Data Integration."""
+
+        def get_train_dataloader(self) -> DataLoader:
+            if isinstance(self.train_dataset, _IterableFromIterator):
+                dataset = RayTorchIterableDataset(self.train_dataset)
+                return DataLoader(dataset, batch_size=1, collate_fn=lambda x: x[0])
+            else:
+                return super().get_train_dataloader()
+
+        def get_eval_dataloader(
+            self, eval_dataset: Optional[Dataset] = None
+        ) -> DataLoader:
+            if eval_dataset is None:
+                eval_dataset = self.eval_dataset
+
+            if isinstance(eval_dataset, _IterableFromIterator):
+                dataset = RayTorchIterableDataset(eval_dataset)
+                return DataLoader(dataset, batch_size=1, collate_fn=lambda x: x[0])
+            else:
+                return super().get_eval_dataloader(eval_dataset)
+
+    trainer.__class__ = RayTransformersTrainer
+
+    record_extra_usage_tag(TagKey.TRAIN_TRANSFORMERS_PREPARE_TRAINER, "1")
+    return trainer
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..531165e7c8463f873af787dfc495b02764d3f91c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__init__.py
@@ -0,0 +1,18 @@
+from ray.train.lightgbm._lightgbm_utils import RayTrainReportCallback
+from ray.train.lightgbm.lightgbm_checkpoint import LightGBMCheckpoint
+from ray.train.lightgbm.lightgbm_predictor import LightGBMPredictor
+from ray.train.lightgbm.lightgbm_trainer import LightGBMTrainer
+from ray.train.v2._internal.constants import is_v2_enabled
+
+if is_v2_enabled():
+    from ray.train.v2.lightgbm.lightgbm_trainer import LightGBMTrainer  # noqa: F811
+
+__all__ = [
+    "RayTrainReportCallback",
+    "LightGBMCheckpoint",
+    "LightGBMPredictor",
+    "LightGBMTrainer",
+]
+
+
+# DO NOT ADD ANYTHING AFTER THIS LINE.
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..473c3709ebe869c07987213745c0fbeef54a13df
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/_lightgbm_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/_lightgbm_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2f8264c58b34f09357e4cb7dd28fd623973cedb
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/_lightgbm_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/config.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/config.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6054fcef950d199be9e161ee64a01aca4b935401
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/config.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_checkpoint.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_checkpoint.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eeb047d8b5bb93098fd99fb5bab6f3ae96ac6ea0
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_checkpoint.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_predictor.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_predictor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4f8394effebbdc16d43742eb0eaef69a89d8e05
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_predictor.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_trainer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_trainer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cedfb623bbc1076f57319cffc1192d5dd419752c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/lightgbm_trainer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/v2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/v2.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1b2b4be343c58ecdb9ba5d5cbd62c0f18afceb67
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/__pycache__/v2.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/_lightgbm_utils.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/_lightgbm_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..15c4e344bd16dd19e84b3863eb4614bcff5fbf44
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/_lightgbm_utils.py
@@ -0,0 +1,170 @@
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union
+
+from lightgbm.basic import Booster
+from lightgbm.callback import CallbackEnv
+
+import ray.train
+from ray.train import Checkpoint
+from ray.tune.utils import flatten_dict
+from ray.util.annotations import PublicAPI
+
+
+@PublicAPI(stability="beta")
+class RayTrainReportCallback:
+    """Creates a callback that reports metrics and checkpoints model.
+
+    Args:
+        metrics: Metrics to report. If this is a list,
+            each item should be a metric key reported by LightGBM,
+            and it will be reported to Ray Train/Tune under the same name.
+            This can also be a dict of {<key-to-report>: <lightgbm-metric-key>},
+            which can be used to rename LightGBM default metrics.
+        filename: Customize the saved checkpoint file type by passing
+            a filename. Defaults to "model.txt".
+        frequency: How often to save checkpoints, in terms of iterations.
+            Defaults to 0 (no checkpoints are saved during training).
+        checkpoint_at_end: Whether or not to save a checkpoint at the end of training.
+        results_postprocessing_fn: An optional Callable that takes in
+            the metrics dict that will be reported (after it has been flattened)
+            and returns a modified dict.
+
+    Examples
+    --------
+
+    Reporting checkpoints and metrics to Ray Tune when running many
+    independent xgboost trials (without data parallelism within a trial).
+
+    .. testcode::
+        :skipif: True
+
+        import lightgbm
+
+        from ray.train.lightgbm import RayTrainReportCallback
+
+        config = {
+            # ...
+            "metric": ["binary_logloss", "binary_error"],
+        }
+
+        # Report only log loss to Tune after each validation epoch.
+        bst = lightgbm.train(
+            ...,
+            callbacks=[
+                RayTrainReportCallback(
+                    metrics={"loss": "eval-binary_logloss"}, frequency=1
+                )
+            ],
+        )
+
+    Loading a model from a checkpoint reported by this callback.
+
+    .. testcode::
+        :skipif: True
+
+        from ray.train.lightgbm import RayTrainReportCallback
+
+        # Get a `Checkpoint` object that is saved by the callback during training.
+        result = trainer.fit()
+        booster = RayTrainReportCallback.get_model(result.checkpoint)
+
+    """
+
+    CHECKPOINT_NAME = "model.txt"
+
+    def __init__(
+        self,
+        metrics: Optional[Union[str, List[str], Dict[str, str]]] = None,
+        filename: str = CHECKPOINT_NAME,
+        frequency: int = 0,
+        checkpoint_at_end: bool = True,
+        results_postprocessing_fn: Optional[
+            Callable[[Dict[str, Union[float, List[float]]]], Dict[str, float]]
+        ] = None,
+    ):
+        if isinstance(metrics, str):
+            metrics = [metrics]
+        self._metrics = metrics
+        self._filename = filename
+        self._frequency = frequency
+        self._checkpoint_at_end = checkpoint_at_end
+        self._results_postprocessing_fn = results_postprocessing_fn
+
+    @classmethod
+    def get_model(
+        cls, checkpoint: Checkpoint, filename: str = CHECKPOINT_NAME
+    ) -> Booster:
+        """Retrieve the model stored in a checkpoint reported by this callback.
+
+        Args:
+            checkpoint: The checkpoint object returned by a training run.
+                The checkpoint should be saved by an instance of this callback.
+            filename: The filename to load the model from, which should match
+                the filename used when creating the callback.
+        """
+        with checkpoint.as_directory() as checkpoint_path:
+            return Booster(model_file=Path(checkpoint_path, filename).as_posix())
+
+    def _get_report_dict(self, evals_log: Dict[str, Dict[str, list]]) -> dict:
+        result_dict = flatten_dict(evals_log, delimiter="-")
+        if not self._metrics:
+            report_dict = result_dict
+        else:
+            report_dict = {}
+            for key in self._metrics:
+                if isinstance(self._metrics, dict):
+                    metric = self._metrics[key]
+                else:
+                    metric = key
+                report_dict[key] = result_dict[metric]
+        if self._results_postprocessing_fn:
+            report_dict = self._results_postprocessing_fn(report_dict)
+        return report_dict
+
+    def _get_eval_result(self, env: CallbackEnv) -> dict:
+        eval_result = {}
+        for entry in env.evaluation_result_list:
+            data_name, eval_name, result = entry[0:3]
+            if len(entry) > 4:
+                stdv = entry[4]
+                suffix = "-mean"
+            else:
+                stdv = None
+                suffix = ""
+            if data_name not in eval_result:
+                eval_result[data_name] = {}
+            eval_result[data_name][eval_name + suffix] = result
+            if stdv is not None:
+                eval_result[data_name][eval_name + "-stdv"] = stdv
+        return eval_result
+
+    @contextmanager
+    def _get_checkpoint(self, model: Booster) -> Optional[Checkpoint]:
+        if ray.train.get_context().get_world_rank() in (0, None):
+            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
+                model.save_model(Path(temp_checkpoint_dir, self._filename).as_posix())
+                yield Checkpoint.from_directory(temp_checkpoint_dir)
+        else:
+            yield None
+
+    def __call__(self, env: CallbackEnv) -> None:
+        eval_result = self._get_eval_result(env)
+        report_dict = self._get_report_dict(eval_result)
+
+        # Ex: if frequency=2, checkpoint_at_end=True and num_boost_rounds=11,
+        # you will checkpoint at iterations 1, 3, 5, ..., 9, and 10 (checkpoint_at_end)
+        # (iterations count from 0)
+        on_last_iter = env.iteration == env.end_iteration - 1
+        should_checkpoint_at_end = on_last_iter and self._checkpoint_at_end
+        should_checkpoint_with_frequency = (
+            self._frequency != 0 and (env.iteration + 1) % self._frequency == 0
+        )
+        should_checkpoint = should_checkpoint_at_end or should_checkpoint_with_frequency
+
+        if should_checkpoint:
+            with self._get_checkpoint(model=env.model) as checkpoint:
+                ray.train.report(report_dict, checkpoint=checkpoint)
+        else:
+            ray.train.report(report_dict)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/config.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..c57f4b6d17c71c563b25e59af6b4ecda1cc7cbeb
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/config.py
@@ -0,0 +1,89 @@
+import logging
+import threading
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+import ray
+from ray.train._internal.utils import get_address_and_port
+from ray.train._internal.worker_group import WorkerGroup
+from ray.train.backend import Backend, BackendConfig
+
+logger = logging.getLogger(__name__)
+
+
+# Global LightGBM distributed network configuration for each worker process.
+_lightgbm_network_params: Optional[Dict[str, Any]] = None
+_lightgbm_network_params_lock = threading.Lock()
+
+
+def get_network_params() -> Dict[str, Any]:
+    """Returns the network parameters to enable LightGBM distributed training."""
+    global _lightgbm_network_params
+
+    with _lightgbm_network_params_lock:
+        if not _lightgbm_network_params:
+            logger.warning(
+                "`ray.train.lightgbm.get_network_params` was called outside "
+                "the context of a `ray.train.lightgbm.LightGBMTrainer`. "
+                "The current process has no knowledge of the distributed training "
+                "worker group, so this method will return an empty dict. "
+                "Please call this within the training loop of a "
+                "`ray.train.lightgbm.LightGBMTrainer`. "
+                "If you are in fact calling this within a `LightGBMTrainer`, "
+                "this is unexpected: please file a bug report to the Ray Team."
+            )
+            return {}
+
+        return _lightgbm_network_params.copy()
+
+
+def _set_network_params(
+    num_machines: int,
+    local_listen_port: int,
+    machines: str,
+):
+    global _lightgbm_network_params
+
+    with _lightgbm_network_params_lock:
+        assert (
+            _lightgbm_network_params is None
+        ), "LightGBM network params are already initialized."
+        _lightgbm_network_params = dict(
+            num_machines=num_machines,
+            local_listen_port=local_listen_port,
+            machines=machines,
+        )
+
+
+@dataclass
+class LightGBMConfig(BackendConfig):
+    """Configuration for LightGBM distributed data-parallel training setup.
+
+    See the LightGBM docs for more information on the "network parameters"
+    that Ray Train sets up for you:
+    https://lightgbm.readthedocs.io/en/latest/Parameters.html#network-parameters
+    """
+
+    @property
+    def backend_cls(self):
+        return _LightGBMBackend
+
+
+class _LightGBMBackend(Backend):
+    def on_training_start(
+        self, worker_group: WorkerGroup, backend_config: LightGBMConfig
+    ):
+        node_ips_and_ports = worker_group.execute(get_address_and_port)
+        ports = [port for _, port in node_ips_and_ports]
+        machines = ",".join(
+            [f"{node_ip}:{port}" for node_ip, port in node_ips_and_ports]
+        )
+        num_machines = len(worker_group)
+        ray.get(
+            [
+                worker_group.execute_single_async(
+                    rank, _set_network_params, num_machines, ports[rank], machines
+                )
+                for rank in range(len(worker_group))
+            ]
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_checkpoint.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..786b411d8d0dffeb8c438f4d16dfa74bc0e9898a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_checkpoint.py
@@ -0,0 +1,70 @@
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+
+import lightgbm
+
+from ray.train._internal.framework_checkpoint import FrameworkCheckpoint
+from ray.util.annotations import PublicAPI
+
+if TYPE_CHECKING:
+    from ray.data.preprocessor import Preprocessor
+
+
+@PublicAPI(stability="beta")
+class LightGBMCheckpoint(FrameworkCheckpoint):
+    """A :py:class:`~ray.train.Checkpoint` with LightGBM-specific functionality."""
+
+    MODEL_FILENAME = "model.txt"
+
+    @classmethod
+    def from_model(
+        cls,
+        booster: lightgbm.Booster,
+        *,
+        preprocessor: Optional["Preprocessor"] = None,
+        path: Optional[str] = None,
+    ) -> "LightGBMCheckpoint":
+        """Create a :py:class:`~ray.train.Checkpoint` that stores a LightGBM model.
+
+        Args:
+            booster: The LightGBM model to store in the checkpoint.
+            preprocessor: A fitted preprocessor to be applied before inference.
+            path: The path to the directory where the checkpoint file will be saved.
+                This should start as an empty directory, since the *entire*
+                directory will be treated as the checkpoint when reported.
+                By default, a temporary directory will be created.
+
+        Returns:
+            An :py:class:`LightGBMCheckpoint` containing the specified ``Estimator``.
+
+        Examples:
+            >>> import lightgbm
+            >>> import numpy as np
+            >>> from ray.train.lightgbm import LightGBMCheckpoint
+            >>>
+            >>> train_X = np.array([[1, 2], [3, 4]])
+            >>> train_y = np.array([0, 1])
+            >>>
+            >>> model = lightgbm.LGBMClassifier().fit(train_X, train_y)
+            >>> checkpoint = LightGBMCheckpoint.from_model(model.booster_)
+        """
+        checkpoint_path = Path(path or tempfile.mkdtemp())
+
+        if not checkpoint_path.is_dir():
+            raise ValueError(f"`path` must be a directory, but got: {checkpoint_path}")
+
+        booster.save_model(checkpoint_path.joinpath(cls.MODEL_FILENAME).as_posix())
+
+        checkpoint = cls.from_directory(checkpoint_path.as_posix())
+        if preprocessor:
+            checkpoint.set_preprocessor(preprocessor)
+
+        return checkpoint
+
+    def get_model(self) -> lightgbm.Booster:
+        """Retrieve the LightGBM model stored in this checkpoint."""
+        with self.as_directory() as checkpoint_path:
+            return lightgbm.Booster(
+                model_file=Path(checkpoint_path, self.MODEL_FILENAME).as_posix()
+            )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_predictor.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..62c5237d00760617f0cf2a48ea996eda4552e32f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_predictor.py
@@ -0,0 +1,152 @@
+from typing import TYPE_CHECKING, List, Optional, Union
+
+import lightgbm
+import pandas as pd
+from pandas.api.types import is_object_dtype
+
+from ray.air.constants import TENSOR_COLUMN_NAME
+from ray.air.data_batch_type import DataBatchType
+from ray.air.util.data_batch_conversion import _unwrap_ndarray_object_type_if_needed
+from ray.train.lightgbm import LightGBMCheckpoint
+from ray.train.predictor import Predictor
+from ray.util.annotations import PublicAPI
+
+if TYPE_CHECKING:
+    from ray.data.preprocessor import Preprocessor
+
+
+@PublicAPI(stability="beta")
+class LightGBMPredictor(Predictor):
+    """A predictor for LightGBM models.
+
+    Args:
+        model: The LightGBM booster to use for predictions.
+        preprocessor: A preprocessor used to transform data batches prior
+            to prediction.
+    """
+
+    def __init__(
+        self, model: lightgbm.Booster, preprocessor: Optional["Preprocessor"] = None
+    ):
+        self.model = model
+        super().__init__(preprocessor)
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(model={self.model!r}, "
+            f"preprocessor={self._preprocessor!r})"
+        )
+
+    @classmethod
+    def from_checkpoint(cls, checkpoint: LightGBMCheckpoint) -> "LightGBMPredictor":
+        """Instantiate the predictor from a LightGBMCheckpoint.
+
+        Args:
+            checkpoint: The checkpoint to load the model and preprocessor from.
+
+        """
+        model = checkpoint.get_model()
+        preprocessor = checkpoint.get_preprocessor()
+        return cls(model=model, preprocessor=preprocessor)
+
+    def predict(
+        self,
+        data: DataBatchType,
+        feature_columns: Optional[Union[List[str], List[int]]] = None,
+        **predict_kwargs,
+    ) -> DataBatchType:
+        """Run inference on data batch.
+
+        Args:
+            data: A batch of input data.
+            feature_columns: The names or indices of the columns in the
+                data to use as features to predict on. If None, then use
+                all columns in ``data``.
+            **predict_kwargs: Keyword arguments passed to
+                ``lightgbm.Booster.predict``.
+
+        Examples:
+            >>> import numpy as np
+            >>> import lightgbm as lgbm
+            >>> from ray.train.lightgbm import LightGBMPredictor
+            >>>
+            >>> train_X = np.array([[1, 2], [3, 4]])
+            >>> train_y = np.array([0, 1])
+            >>>
+            >>> model = lgbm.LGBMClassifier().fit(train_X, train_y)
+            >>> predictor = LightGBMPredictor(model=model.booster_)
+            >>>
+            >>> data = np.array([[1, 2], [3, 4]])
+            >>> predictions = predictor.predict(data)
+            >>>
+            >>> # Only use first and second column as the feature
+            >>> data = np.array([[1, 2, 8], [3, 4, 9]])
+            >>> predictions = predictor.predict(data, feature_columns=[0, 1])
+
+            >>> import pandas as pd
+            >>> import lightgbm as lgbm
+            >>> from ray.train.lightgbm import LightGBMPredictor
+            >>>
+            >>> train_X = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+            >>> train_y = pd.Series([0, 1])
+            >>>
+            >>> model = lgbm.LGBMClassifier().fit(train_X, train_y)
+            >>> predictor = LightGBMPredictor(model=model.booster_)
+            >>>
+            >>> # Pandas dataframe.
+            >>> data = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+            >>> predictions = predictor.predict(data)
+            >>>
+            >>> # Only use first and second column as the feature
+            >>> data = pd.DataFrame([[1, 2, 8], [3, 4, 9]], columns=["A", "B", "C"])
+            >>> predictions = predictor.predict(data, feature_columns=["A", "B"])
+
+
+        Returns:
+            Prediction result.
+
+        """
+        return Predictor.predict(
+            self, data, feature_columns=feature_columns, **predict_kwargs
+        )
+
+    def _predict_pandas(
+        self,
+        data: "pd.DataFrame",
+        feature_columns: Optional[Union[List[str], List[int]]] = None,
+        **predict_kwargs,
+    ) -> pd.DataFrame:
+        feature_names = None
+        if TENSOR_COLUMN_NAME in data:
+            data = data[TENSOR_COLUMN_NAME].to_numpy()
+            data = _unwrap_ndarray_object_type_if_needed(data)
+            if feature_columns:
+                # In this case feature_columns is a list of integers
+                data = data[:, feature_columns]
+            # Turn into dataframe to make dtype resolution easy
+            data = pd.DataFrame(data, columns=feature_names)
+            data = data.infer_objects()
+
+            # Pandas does not detect categorical dtypes. Any remaining object
+            # dtypes are probably categories, so convert them.
+            # This will fail if we have a category composed entirely of
+            # integers, but this is the best we can do here.
+            update_dtypes = {}
+            for column in data.columns:
+                dtype = data.dtypes[column]
+                if is_object_dtype(dtype):
+                    update_dtypes[column] = pd.CategoricalDtype()
+
+            if update_dtypes:
+                data = data.astype(update_dtypes, copy=False)
+        elif feature_columns:
+            # feature_columns is a list of integers or strings
+            data = data[feature_columns]
+
+        df = pd.DataFrame(self.model.predict(data, **predict_kwargs))
+        df.columns = (
+            ["predictions"]
+            if len(df.columns) == 1
+            else [f"predictions_{i}" for i in range(len(df.columns))]
+        )
+        return df
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_trainer.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7057100cc571286cd26d231899806f0d95a1dc34
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/lightgbm_trainer.py
@@ -0,0 +1,221 @@
+import logging
+from functools import partial
+from typing import Any, Dict, Optional
+
+import lightgbm
+
+import ray
+from ray.train import Checkpoint
+from ray.train.constants import _DEPRECATED_VALUE, TRAIN_DATASET_KEY
+from ray.train.lightgbm import RayTrainReportCallback
+from ray.train.lightgbm.v2 import LightGBMTrainer as SimpleLightGBMTrainer
+from ray.train.trainer import GenDataset
+from ray.util.annotations import PublicAPI
+
+logger = logging.getLogger(__name__)
+
+
+def _lightgbm_train_fn_per_worker(
+    config: dict,
+    label_column: str,
+    num_boost_round: int,
+    dataset_keys: set,
+    lightgbm_train_kwargs: dict,
+):
+    checkpoint = ray.train.get_checkpoint()
+    starting_model = None
+    remaining_iters = num_boost_round
+    if checkpoint:
+        starting_model = RayTrainReportCallback.get_model(checkpoint)
+        starting_iter = starting_model.current_iteration()
+        remaining_iters = num_boost_round - starting_iter
+        logger.info(
+            f"Model loaded from checkpoint will train for "
+            f"additional {remaining_iters} iterations (trees) in order "
+            "to achieve the target number of iterations "
+            f"({num_boost_round=})."
+        )
+
+    train_ds_iter = ray.train.get_dataset_shard(TRAIN_DATASET_KEY)
+    train_df = train_ds_iter.materialize().to_pandas()
+
+    eval_ds_iters = {
+        k: ray.train.get_dataset_shard(k)
+        for k in dataset_keys
+        if k != TRAIN_DATASET_KEY
+    }
+    eval_dfs = {k: d.materialize().to_pandas() for k, d in eval_ds_iters.items()}
+
+    train_X, train_y = train_df.drop(label_column, axis=1), train_df[label_column]
+    train_set = lightgbm.Dataset(train_X, label=train_y)
+
+    # NOTE: Include the training dataset in the evaluation datasets.
+    # This allows `train-*` metrics to be calculated and reported.
+    valid_sets = [train_set]
+    valid_names = [TRAIN_DATASET_KEY]
+
+    for eval_name, eval_df in eval_dfs.items():
+        eval_X, eval_y = eval_df.drop(label_column, axis=1), eval_df[label_column]
+        valid_sets.append(lightgbm.Dataset(eval_X, label=eval_y))
+        valid_names.append(eval_name)
+
+    # Add network params of the worker group to enable distributed training.
+    config.update(ray.train.lightgbm.v2.get_network_params())
+
+    lightgbm.train(
+        params=config,
+        train_set=train_set,
+        num_boost_round=remaining_iters,
+        valid_sets=valid_sets,
+        valid_names=valid_names,
+        init_model=starting_model,
+        **lightgbm_train_kwargs,
+    )
+
+
+@PublicAPI(stability="beta")
+class LightGBMTrainer(SimpleLightGBMTrainer):
+    """A Trainer for data parallel LightGBM training.
+
+    This Trainer runs the LightGBM training loop in a distributed manner
+    using multiple Ray Actors.
+
+    If you would like to take advantage of LightGBM's built-in handling
+    for features with the categorical data type, consider applying the
+    :class:`Categorizer` preprocessor to set the dtypes in the dataset.
+
+    .. note::
+        ``LightGBMTrainer`` does not modify or otherwise alter the working
+        of the LightGBM distributed training algorithm.
+        Ray only provides orchestration, data ingest and fault tolerance.
+        For more information on LightGBM distributed training, refer to
+        `LightGBM documentation <https://lightgbm.readthedocs.io/>`__.
+
+    Example:
+        .. testcode::
+
+            import ray
+
+            from ray.train.lightgbm import LightGBMTrainer
+            from ray.train import ScalingConfig
+
+            train_dataset = ray.data.from_items(
+                [{"x": x, "y": x + 1} for x in range(32)]
+            )
+            trainer = LightGBMTrainer(
+                label_column="y",
+                params={"objective": "regression"},
+                scaling_config=ScalingConfig(num_workers=3),
+                datasets={"train": train_dataset},
+            )
+            result = trainer.fit()
+
+        .. testoutput::
+            :hide:
+
+            ...
+
+    Args:
+        datasets: The Ray Datasets to use for training and validation. Must include a
+            "train" key denoting the training dataset. All non-training datasets will
+            be used as separate validation sets, each reporting a separate metric.
+        label_column: Name of the label column. A column with this name
+            must be present in the training dataset.
+        params: LightGBM training parameters passed to ``lightgbm.train()``.
+            Refer to `LightGBM documentation <https://lightgbm.readthedocs.io>`_
+            for a list of possible parameters.
+        num_boost_round: Target number of boosting iterations (trees in the model).
+            Note that unlike in ``lightgbm.train``, this is the target number
+            of trees, meaning that if you set ``num_boost_round=10`` and pass a model
+            that has already been trained for 5 iterations, it will be trained for 5
+            iterations more, instead of 10 more.
+        scaling_config: Configuration for how to scale data parallel training.
+        run_config: Configuration for the execution of the training run.
+        resume_from_checkpoint: A checkpoint to resume training from.
+        metadata: Dict that should be made available in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+        **train_kwargs: Additional kwargs passed to ``lightgbm.train()`` function.
+    """
+
+    _handles_checkpoint_freq = True
+    _handles_checkpoint_at_end = True
+
+    def __init__(
+        self,
+        *,
+        datasets: Dict[str, GenDataset],
+        label_column: str,
+        params: Dict[str, Any],
+        num_boost_round: int = 10,
+        scaling_config: Optional[ray.train.ScalingConfig] = None,
+        run_config: Optional[ray.train.RunConfig] = None,
+        dataset_config: Optional[ray.train.DataConfig] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        dmatrix_params: Optional[Dict[str, Dict[str, Any]]] = _DEPRECATED_VALUE,
+        **train_kwargs,
+    ):
+        # TODO(justinvyu): [Deprecated] Remove in 2.11
+        if dmatrix_params != _DEPRECATED_VALUE:
+            raise DeprecationWarning(
+                "`dmatrix_params` is deprecated, since XGBoostTrainer no longer "
+                "depends on the `xgboost_ray.RayDMatrix` utility. "
+                "You can remove this argument and use `dataset_config` instead "
+                "to customize Ray Dataset ingestion."
+            )
+
+        # Initialize a default Ray Train metrics/checkpoint reporting callback if needed
+        callbacks = train_kwargs.get("callbacks", [])
+        user_supplied_callback = any(
+            isinstance(callback, RayTrainReportCallback) for callback in callbacks
+        )
+        callback_kwargs = {}
+        if run_config:
+            checkpoint_frequency = run_config.checkpoint_config.checkpoint_frequency
+            checkpoint_at_end = run_config.checkpoint_config.checkpoint_at_end
+
+            callback_kwargs["frequency"] = checkpoint_frequency
+            # Default `checkpoint_at_end=True` unless the user explicitly sets it.
+            callback_kwargs["checkpoint_at_end"] = (
+                checkpoint_at_end if checkpoint_at_end is not None else True
+            )
+
+        if not user_supplied_callback:
+            callbacks.append(RayTrainReportCallback(**callback_kwargs))
+        train_kwargs["callbacks"] = callbacks
+
+        train_fn_per_worker = partial(
+            _lightgbm_train_fn_per_worker,
+            label_column=label_column,
+            num_boost_round=num_boost_round,
+            dataset_keys=set(datasets),
+            lightgbm_train_kwargs=train_kwargs,
+        )
+
+        super(LightGBMTrainer, self).__init__(
+            train_loop_per_worker=train_fn_per_worker,
+            train_loop_config=params,
+            scaling_config=scaling_config,
+            run_config=run_config,
+            datasets=datasets,
+            dataset_config=dataset_config,
+            resume_from_checkpoint=resume_from_checkpoint,
+            metadata=metadata,
+        )
+
+    @classmethod
+    def get_model(
+        cls,
+        checkpoint: Checkpoint,
+    ) -> lightgbm.Booster:
+        """Retrieve the LightGBM model stored in this checkpoint."""
+        return RayTrainReportCallback.get_model(checkpoint)
+
+    def _validate_attributes(self):
+        super()._validate_attributes()
+
+        if TRAIN_DATASET_KEY not in self.datasets:
+            raise KeyError(
+                f"'{TRAIN_DATASET_KEY}' key must be preset in `datasets`. "
+                f"Got {list(self.datasets.keys())}"
+            )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/lightgbm/v2.py b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b943cdf3a1dcf28453966e5636329dcd1821679
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/lightgbm/v2.py
@@ -0,0 +1,132 @@
+import logging
+from typing import Any, Callable, Dict, Optional, Union
+
+import ray.train
+from ray.train import Checkpoint
+from ray.train.data_parallel_trainer import DataParallelTrainer
+from ray.train.lightgbm.config import LightGBMConfig, get_network_params  # noqa: F401
+from ray.train.trainer import GenDataset
+
+logger = logging.getLogger(__name__)
+
+
+class LightGBMTrainer(DataParallelTrainer):
+    """A Trainer for distributed data-parallel LightGBM training.
+
+    Example
+    -------
+
+    .. testcode::
+
+        import lightgbm as lgb
+
+        import ray.data
+        import ray.train
+        from ray.train.lightgbm import RayTrainReportCallback
+        from ray.train.lightgbm.v2 import LightGBMTrainer
+
+
+        def train_fn_per_worker(config: dict):
+            # (Optional) Add logic to resume training state from a checkpoint.
+            # ray.train.get_checkpoint()
+
+            # 1. Get the dataset shard for the worker and convert to a `lgb.Dataset`
+            train_ds_iter, eval_ds_iter = (
+                ray.train.get_dataset_shard("train"),
+                ray.train.get_dataset_shard("validation"),
+            )
+            train_ds, eval_ds = train_ds_iter.materialize(), eval_ds_iter.materialize()
+            train_df, eval_df = train_ds.to_pandas(), eval_ds.to_pandas()
+            train_X, train_y = train_df.drop("y", axis=1), train_df["y"]
+            eval_X, eval_y = eval_df.drop("y", axis=1), eval_df["y"]
+
+            train_set = lgb.Dataset(train_X, label=train_y)
+            eval_set = lgb.Dataset(eval_X, label=eval_y)
+
+            # 2. Run distributed data-parallel training.
+            # `get_network_params` sets up the necessary configurations for LightGBM
+            # to set up the data parallel training worker group on your Ray cluster.
+            params = {
+                "objective": "regression",
+                # Adding the line below is the only change needed
+                # for your `lgb.train` call!
+                **ray.train.lightgbm.v2.get_network_params(),
+            }
+            lgb.train(
+                params,
+                train_set,
+                valid_sets=[eval_set],
+                valid_names=["eval"],
+                callbacks=[RayTrainReportCallback()],
+            )
+
+        train_ds = ray.data.from_items([{"x": x, "y": x + 1} for x in range(32)])
+        eval_ds = ray.data.from_items(
+            [{"x": x, "y": x + 1} for x in range(32, 32 + 16)]
+        )
+        trainer = LightGBMTrainer(
+            train_fn_per_worker,
+            datasets={"train": train_ds, "validation": eval_ds},
+            scaling_config=ray.train.ScalingConfig(num_workers=4),
+        )
+        result = trainer.fit()
+        booster = RayTrainReportCallback.get_model(result.checkpoint)
+
+    .. testoutput::
+        :hide:
+
+        ...
+
+    Args:
+        train_loop_per_worker: The training function to execute on each worker.
+            This function can either take in zero arguments or a single ``Dict``
+            argument which is set by defining ``train_loop_config``.
+            Within this function you can use any of the
+            :ref:`Ray Train Loop utilities <train-loop-api>`.
+        train_loop_config: A configuration ``Dict`` to pass in as an argument to
+            ``train_loop_per_worker``.
+            This is typically used for specifying hyperparameters.
+        lightgbm_config: The configuration for setting up the distributed lightgbm
+            backend. See :class:`~ray.train.lightgbm.LightGBMConfig` for more info.
+        datasets: The Ray Datasets to use for training and validation.
+        dataset_config: The configuration for ingesting the input ``datasets``.
+            By default, all the Ray Dataset are split equally across workers.
+            See :class:`~ray.train.DataConfig` for more details.
+        scaling_config: The configuration for how to scale data parallel training.
+            ``num_workers`` determines how many Python processes are used for training,
+            and ``use_gpu`` determines whether or not each process should use GPUs.
+            See :class:`~ray.train.ScalingConfig` for more info.
+        run_config: The configuration for the execution of the training run.
+            See :class:`~ray.train.RunConfig` for more info.
+        resume_from_checkpoint: A checkpoint to resume training from.
+            This checkpoint can be accessed from within ``train_loop_per_worker``
+            by calling ``ray.train.get_checkpoint()``.
+        metadata: Dict that should be made available via
+            `ray.train.get_context().get_metadata()` and in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+    """
+
+    def __init__(
+        self,
+        train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]],
+        *,
+        train_loop_config: Optional[Dict] = None,
+        lightgbm_config: Optional[LightGBMConfig] = None,
+        scaling_config: Optional[ray.train.ScalingConfig] = None,
+        run_config: Optional[ray.train.RunConfig] = None,
+        datasets: Optional[Dict[str, GenDataset]] = None,
+        dataset_config: Optional[ray.train.DataConfig] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+    ):
+        super(LightGBMTrainer, self).__init__(
+            train_loop_per_worker=train_loop_per_worker,
+            train_loop_config=train_loop_config,
+            backend_config=lightgbm_config or LightGBMConfig(),
+            scaling_config=scaling_config,
+            dataset_config=dataset_config,
+            run_config=run_config,
+            datasets=datasets,
+            resume_from_checkpoint=resume_from_checkpoint,
+            metadata=metadata,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/predictor.py b/.venv/lib/python3.11/site-packages/ray/train/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e25bdaa8022bbd5b1cd1cc88397d32fe726f1dd
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/predictor.py
@@ -0,0 +1,254 @@
+import abc
+from typing import Callable, Dict, Optional, Type, Union
+
+import numpy as np
+import pandas as pd
+
+from ray.air.data_batch_type import DataBatchType
+from ray.air.util.data_batch_conversion import (
+    BatchFormat,
+    _convert_batch_type_to_numpy,
+    _convert_batch_type_to_pandas,
+)
+from ray.data import Preprocessor
+from ray.train import Checkpoint
+from ray.util.annotations import DeveloperAPI, PublicAPI
+
+try:
+    import pyarrow
+
+    pa_table = pyarrow.Table
+except ImportError:
+    pa_table = None
+
+# Reverse mapping from data batch type to batch format.
+TYPE_TO_ENUM: Dict[Type[DataBatchType], BatchFormat] = {
+    np.ndarray: BatchFormat.NUMPY,
+    dict: BatchFormat.NUMPY,
+    pd.DataFrame: BatchFormat.PANDAS,
+}
+
+
+@PublicAPI(stability="beta")
+class PredictorNotSerializableException(RuntimeError):
+    """Error raised when trying to serialize a Predictor instance."""
+
+    pass
+
+
+@PublicAPI(stability="beta")
+class Predictor(abc.ABC):
+    """Predictors load models from checkpoints to perform inference.
+
+    .. note::
+        The base ``Predictor`` class cannot be instantiated directly. Only one of
+        its subclasses can be used.
+
+    **How does a Predictor work?**
+
+    Predictors expose a ``predict`` method that accepts an input batch of type
+    ``DataBatchType`` and outputs predictions of the same type as the input batch.
+
+    When the ``predict`` method is called the following occurs:
+
+    - The input batch is converted into a pandas DataFrame. Tensor input (like a
+      ``np.ndarray``) will be converted into a single column Pandas Dataframe.
+    - If there is a :ref:`Preprocessor <preprocessor-ref>` saved in the provided
+      :class:`Checkpoint <ray.train.Checkpoint>`, the preprocessor will be used to
+      transform the DataFrame.
+    - The transformed DataFrame will be passed to the model for inference (via the
+      ``predictor._predict_pandas`` method).
+    - The predictions will be outputted by ``predict`` in the same type as the
+      original input.
+
+    **How do I create a new Predictor?**
+
+    To implement a new Predictor for your particular framework, you should subclass
+    the base ``Predictor`` and implement the following two methods:
+
+    1. ``_predict_pandas``: Given a pandas.DataFrame input, return a
+       pandas.DataFrame containing predictions.
+    2. ``from_checkpoint``: Logic for creating a Predictor from a
+       :class:`Checkpoint <ray.train.Checkpoint>`.
+    3. Optionally ``_predict_numpy`` for better performance when working with
+       tensor data to avoid extra copies from Pandas conversions.
+    """
+
+    def __init__(self, preprocessor: Optional[Preprocessor] = None):
+        """Subclasseses must call Predictor.__init__() to set a preprocessor."""
+        self._preprocessor: Optional[Preprocessor] = preprocessor
+        # Whether tensor columns should be automatically cast from/to the tensor
+        # extension type at UDF boundaries. This can be overridden by subclasses.
+        self._cast_tensor_columns = False
+
+    @classmethod
+    @abc.abstractmethod
+    def from_checkpoint(cls, checkpoint: Checkpoint, **kwargs) -> "Predictor":
+        """Create a specific predictor from a checkpoint.
+
+        Args:
+            checkpoint: Checkpoint to load predictor data from.
+            kwargs: Arguments specific to predictor implementations.
+
+        Returns:
+            Predictor: Predictor object.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def from_pandas_udf(
+        cls, pandas_udf: Callable[[pd.DataFrame], pd.DataFrame]
+    ) -> "Predictor":
+        """Create a Predictor from a Pandas UDF.
+
+        Args:
+            pandas_udf: A function that takes a pandas.DataFrame and other
+                optional kwargs and returns a pandas.DataFrame.
+        """
+
+        class PandasUDFPredictor(Predictor):
+            @classmethod
+            def from_checkpoint(cls, checkpoint: Checkpoint, **kwargs) -> "Predictor":
+                return PandasUDFPredictor()
+
+            def _predict_pandas(self, df, **kwargs) -> "pd.DataFrame":
+                return pandas_udf(df, **kwargs)
+
+        return PandasUDFPredictor()
+
+    def get_preprocessor(self) -> Optional[Preprocessor]:
+        """Get the preprocessor to use prior to executing predictions."""
+        return self._preprocessor
+
+    def set_preprocessor(self, preprocessor: Optional[Preprocessor]) -> None:
+        """Set the preprocessor to use prior to executing predictions."""
+        self._preprocessor = preprocessor
+
+    @classmethod
+    @DeveloperAPI
+    def preferred_batch_format(cls) -> BatchFormat:
+        """Batch format hint for upstream producers to try yielding best block format.
+
+        The preferred batch format to use if both `_predict_pandas` and
+        `_predict_numpy` are implemented. Defaults to Pandas.
+
+        Can be overriden by predictor classes depending on the framework type,
+        e.g. TorchPredictor prefers Numpy and XGBoostPredictor prefers Pandas as
+        native batch format.
+
+        """
+        return BatchFormat.PANDAS
+
+    @classmethod
+    def _batch_format_to_use(cls) -> BatchFormat:
+        """Determine the batch format to use for the predictor."""
+        has_pandas_implemented = cls._predict_pandas != Predictor._predict_pandas
+        has_numpy_implemented = cls._predict_numpy != Predictor._predict_numpy
+        if has_pandas_implemented and has_numpy_implemented:
+            return cls.preferred_batch_format()
+        elif has_pandas_implemented:
+            return BatchFormat.PANDAS
+        elif has_numpy_implemented:
+            return BatchFormat.NUMPY
+        else:
+            raise NotImplementedError(
+                f"Predictor {cls.__name__} must implement at least one of "
+                "`_predict_pandas` and `_predict_numpy`."
+            )
+
+    def _set_cast_tensor_columns(self):
+        """Enable automatic tensor column casting.
+
+        If this is called on a predictor, the predictor will cast tensor columns to
+        NumPy ndarrays in the input to the preprocessors and cast tensor columns back to
+        the tensor extension type in the prediction outputs.
+        """
+        self._cast_tensor_columns = True
+
+    def predict(self, data: DataBatchType, **kwargs) -> DataBatchType:
+        """Perform inference on a batch of data.
+
+        Args:
+            data: A batch of input data of type ``DataBatchType``.
+            kwargs: Arguments specific to predictor implementations. These are passed
+            directly to ``_predict_numpy`` or ``_predict_pandas``.
+
+        Returns:
+            DataBatchType:
+                Prediction result. The return type will be the same as the input type.
+        """
+        if not hasattr(self, "_preprocessor"):
+            raise NotImplementedError(
+                "Subclasses of Predictor must call Predictor.__init__(preprocessor)."
+            )
+        try:
+            batch_format = TYPE_TO_ENUM[type(data)]
+        except KeyError:
+            raise RuntimeError(
+                f"Invalid input data type of {type(data)}, supported "
+                f"types: {list(TYPE_TO_ENUM.keys())}"
+            )
+
+        if self._preprocessor:
+            data = self._preprocessor.transform_batch(data)
+
+        batch_format_to_use = self._batch_format_to_use()
+
+        # We can finish prediction as long as one predict method is implemented.
+        # For prediction, we have to return back in the same format as the input.
+        if batch_format == BatchFormat.PANDAS:
+            if batch_format_to_use == BatchFormat.PANDAS:
+                return self._predict_pandas(
+                    _convert_batch_type_to_pandas(data), **kwargs
+                )
+            elif batch_format_to_use == BatchFormat.NUMPY:
+                return _convert_batch_type_to_pandas(
+                    self._predict_numpy(_convert_batch_type_to_numpy(data), **kwargs)
+                )
+        elif batch_format == BatchFormat.NUMPY:
+            if batch_format_to_use == BatchFormat.PANDAS:
+                return _convert_batch_type_to_numpy(
+                    self._predict_pandas(_convert_batch_type_to_pandas(data), **kwargs)
+                )
+            elif batch_format_to_use == BatchFormat.NUMPY:
+                return self._predict_numpy(_convert_batch_type_to_numpy(data), **kwargs)
+
+    @DeveloperAPI
+    def _predict_pandas(self, data: "pd.DataFrame", **kwargs) -> "pd.DataFrame":
+        """Perform inference on a Pandas DataFrame.
+
+        Args:
+            data: A pandas DataFrame to perform predictions on.
+            kwargs: Arguments specific to the predictor implementation.
+
+        Returns:
+            A pandas DataFrame containing the prediction result.
+
+        """
+        raise NotImplementedError
+
+    @DeveloperAPI
+    def _predict_numpy(
+        self, data: Union[np.ndarray, Dict[str, np.ndarray]], **kwargs
+    ) -> Union[np.ndarray, Dict[str, np.ndarray]]:
+        """Perform inference on a Numpy data.
+
+        All Predictors working with tensor data (like deep learning predictors)
+        should implement this method.
+
+        Args:
+            data: A Numpy ndarray or dictionary of ndarrays to perform predictions on.
+            kwargs: Arguments specific to the predictor implementation.
+
+        Returns:
+            A Numpy ndarray or dictionary of ndarray containing the prediction result.
+
+        """
+        raise NotImplementedError
+
+    def __reduce__(self):
+        raise PredictorNotSerializableException(
+            "Predictor instances are not serializable. Instead, you may want "
+            "to serialize a checkpoint and initialize the Predictor with "
+            "Predictor.from_checkpoint."
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/session.py b/.venv/lib/python3.11/site-packages/ray/train/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/trainer.py b/.venv/lib/python3.11/site-packages/ray/train/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..725ba029d766c37d838e456f58abd2182e836a8d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/trainer.py
@@ -0,0 +1,194 @@
+import logging
+import traceback
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, TypeVar, Union
+
+from ray.air._internal.util import (
+    StartTraceback,
+    StartTracebackWithWorkerRank,
+    skip_exceptions,
+)
+from ray.data import Dataset
+from ray.train import Checkpoint, DataConfig
+from ray.train._internal.backend_executor import (
+    BackendExecutor,
+    InactiveWorkerGroupError,
+    TrainBackendError,
+    TrainingWorkerError,
+)
+from ray.train._internal.session import _TrainingResult, _TrainSession, get_session
+from ray.train._internal.utils import ActorWrapper
+from ray.train.backend import BackendConfig
+from ray.train.base_trainer import (  # noqa: F401
+    BaseTrainer,
+    GenDataset,
+    TrainingFailedError,
+)
+from ray.util.annotations import DeveloperAPI
+
+T = TypeVar("T")
+S = TypeVar("S")
+
+logger = logging.getLogger(__name__)
+
+
+@DeveloperAPI
+class TrainingIterator:
+    """An iterator over Train results. Returned by ``trainer.run_iterator``."""
+
+    def __init__(
+        self,
+        backend_executor: Union[BackendExecutor, ActorWrapper],
+        backend_config: BackendConfig,
+        train_func: Union[Callable[[], T], Callable[[Dict[str, Any]], T]],
+        datasets: Dict[str, Dataset],
+        metadata: Dict[str, Any],
+        data_config: DataConfig,
+        checkpoint: Optional[Union[Dict, str, Path, Checkpoint]],
+    ):
+        self._backend_executor = backend_executor
+        self._backend = backend_config.backend_cls()
+        self._train_func = train_func
+        self._datasets = datasets
+        self._metadata = metadata
+        self._data_config = data_config
+
+        self._start_training(
+            train_func=train_func,
+            datasets=self._datasets,
+            metadata=self._metadata,
+            data_config=self._data_config,
+            checkpoint=checkpoint,
+        )
+
+        self._finished_training = False
+
+    def __iter__(self):
+        return self
+
+    def _start_training(
+        self,
+        train_func,
+        datasets,
+        metadata,
+        data_config,
+        checkpoint: Optional[Checkpoint] = None,
+    ):
+        tune_session: _TrainSession = get_session()
+        assert tune_session, "`_start_training` should only be called from within Tune"
+        storage = tune_session.storage
+
+        self._run_with_error_handling(
+            lambda: self._backend_executor.start_training(
+                train_func=train_func,
+                datasets=datasets,
+                metadata=metadata,
+                data_config=data_config,
+                storage=storage,
+                checkpoint=checkpoint,
+            )
+        )
+
+    def _run_with_error_handling(self, func: Callable):
+        try:
+            return func()
+        except TrainingWorkerError:
+            # TODO(ml-team): This Train fault-tolerance code doesn't get used
+            # since max_retries=0
+            # Workers have already been restarted.
+            logger.info(
+                "Workers have been successfully restarted. Resuming "
+                "training from latest checkpoint."
+            )
+            self._start_training(
+                self._train_func,
+                self._datasets,
+                self._metadata,
+                self._data_config,
+            )
+            return self._run_with_error_handling(func)
+        except InactiveWorkerGroupError:
+            raise RuntimeError(
+                "This Trainer is not active. It is either shutdown "
+                "already or never started in the first place. "
+                "Either create a new Trainer or start this one."
+            ) from None
+        except TrainBackendError:
+            raise RuntimeError(
+                "Training failed. You should not be seeing "
+                "this error and this is a bug. Please create "
+                "a new issue at "
+                "https://github.com/ray-project/ray."
+            ) from None
+
+    def __next__(self):
+        if self.is_finished():
+            self._backend_executor.report_final_run_status(errored=False)
+            raise StopIteration
+        try:
+            next_results = self._run_with_error_handling(self._fetch_next_result)
+            if next_results is None:
+                self._backend_executor.report_final_run_status(errored=False)
+                self._run_with_error_handling(self._finish_training)
+                self._finished_training = True
+                raise StopIteration
+            else:
+                return next_results
+        except StartTraceback as e:
+            # If this is a StartTraceback, then this is a user error.
+            # We raise it directly
+            if isinstance(e, StartTracebackWithWorkerRank):
+                failed_rank = e.worker_rank
+            else:
+                failed_rank = None
+
+            # Extract the stack trace from the exception
+            e = skip_exceptions(e)
+            stack_trace = "".join(
+                traceback.format_exception(type(e), e, e.__traceback__)
+            )
+
+            self._backend_executor.report_final_run_status(
+                errored=True, stack_trace=stack_trace, failed_rank=failed_rank
+            )
+            try:
+                # Exception raised in at least one training worker. Immediately raise
+                # this error to the user and do not attempt to terminate gracefully.
+                self._backend_executor.shutdown(graceful_termination=False)
+                self._finished_training = True
+            except Exception:
+                pass
+            raise
+
+    def _fetch_next_result(self) -> Optional[List[Dict]]:
+        """Fetch next results produced by ``session.report()`` from each worker.
+
+        Assumes ``start_training`` has already been called.
+
+        Returns:
+            A list of dictionaries of values passed to ``session.report()`` from
+                each worker. Each item corresponds to an intermediate result
+                a single worker. If there are no more items to fetch,
+                returns None.
+        """
+        results = self._backend_executor.get_next_results()
+        if results is None:
+            return None
+        assert all(isinstance(result, _TrainingResult) for result in results)
+        return results
+
+    def _finish_training(self):
+        """Finish training and return final results. Propagate any exceptions.
+
+        Blocks until training is finished on all workers.
+
+        Assumes `start_training` has already been called.
+
+        Returns:
+            A list of return values from calling ``train_func`` on each worker.
+                Each item corresponds to the return value from a single worker.
+        """
+        return self._backend_executor.finish_training()
+
+    def is_finished(self) -> bool:
+        return self._finished_training
diff --git a/.venv/lib/python3.11/site-packages/ray/train/utils.py b/.venv/lib/python3.11/site-packages/ray/train/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..98b11f1f6091fba061679af2e08bec556315e7d0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/utils.py
@@ -0,0 +1,19 @@
+import warnings
+
+from ray.util.annotations import RayDeprecationWarning
+
+
+def _copy_doc(copy_func):
+    def wrapped(func):
+        func.__doc__ = copy_func.__doc__
+        return func
+
+    return wrapped
+
+
+def _log_deprecation_warning(message):
+    warnings.warn(
+        message,
+        RayDeprecationWarning,
+        stacklevel=2,
+    )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa2d1c88d11b0b1049cf834fb75a39e669f5fbb2
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__init__.py
@@ -0,0 +1,20 @@
+from ray.train.v2._internal.constants import is_v2_enabled
+from ray.train.xgboost._xgboost_utils import RayTrainReportCallback
+from ray.train.xgboost.config import XGBoostConfig
+from ray.train.xgboost.xgboost_checkpoint import XGBoostCheckpoint
+from ray.train.xgboost.xgboost_predictor import XGBoostPredictor
+from ray.train.xgboost.xgboost_trainer import XGBoostTrainer
+
+if is_v2_enabled():
+    from ray.train.v2.xgboost.xgboost_trainer import XGBoostTrainer  # noqa: F811
+
+__all__ = [
+    "RayTrainReportCallback",
+    "XGBoostCheckpoint",
+    "XGBoostConfig",
+    "XGBoostPredictor",
+    "XGBoostTrainer",
+]
+
+
+# DO NOT ADD ANYTHING AFTER THIS LINE.
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..60d4dab19bf2a5ea8173eef5178246c33e8472c6
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/_xgboost_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/_xgboost_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c5e0834c8e82f32975a4f8fe70874b638be9489f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/_xgboost_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/config.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/config.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..435ff698a807dc7145325124a90b7867ba87cf3b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/config.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/v2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/v2.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..836811088ecf5a449f67c93c226d6651a1baa0fb
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/v2.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_checkpoint.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_checkpoint.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11bcd06e38caa845848b237bc2a4bc1c56ac7d88
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_checkpoint.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_predictor.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_predictor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4eb74ce28f5b08090636ed88a57595fe986c8599
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_predictor.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_trainer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_trainer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..30f8a3da6ebd238947ba3b0fcdb3c8ee43747e9e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/xgboost/__pycache__/xgboost_trainer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/_xgboost_utils.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/_xgboost_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..459dfcf07a2208659f469eb6769f5f3c5e2e8fbc
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/_xgboost_utils.py
@@ -0,0 +1,210 @@
+import tempfile
+from collections import OrderedDict
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union
+
+from xgboost.core import Booster
+
+import ray.train
+from ray.train import Checkpoint
+from ray.tune.utils import flatten_dict
+from ray.util.annotations import PublicAPI
+
+try:
+    from xgboost.callback import TrainingCallback
+except ImportError:
+
+    class TrainingCallback:
+        pass
+
+
+class TuneCallback(TrainingCallback):
+    # TODO(justinvyu): [code_removal] Remove this after enforcing min xgboost version.
+    """Base class for Tune's XGBoost callbacks."""
+
+    def __call__(self, env):
+        """Compatibility with xgboost<1.3"""
+        return self.after_iteration(
+            env.model, env.iteration, env.evaluation_result_list
+        )
+
+    def after_iteration(self, model: Booster, epoch: int, evals_log: Dict):
+        raise NotImplementedError
+
+
+@PublicAPI(stability="beta")
+class RayTrainReportCallback(TuneCallback):
+    """XGBoost callback to save checkpoints and report metrics.
+
+    Args:
+        metrics: Metrics to report. If this is a list,
+            each item describes the metric key reported to XGBoost,
+            and it will be reported under the same name.
+            This can also be a dict of {<key-to-report>: <xgboost-metric-key>},
+            which can be used to rename xgboost default metrics.
+        filename: Customize the saved checkpoint file type by passing
+            a filename. Defaults to "model.ubj".
+        frequency: How often to save checkpoints, in terms of iterations.
+            Defaults to 0 (no checkpoints are saved during training).
+        checkpoint_at_end: Whether or not to save a checkpoint at the end of training.
+        results_postprocessing_fn: An optional Callable that takes in
+            the metrics dict that will be reported (after it has been flattened)
+            and returns a modified dict. For example, this can be used to
+            average results across CV fold when using ``xgboost.cv``.
+
+    Examples
+    --------
+
+    Reporting checkpoints and metrics to Ray Tune when running many
+    independent xgboost trials (without data parallelism within a trial).
+
+    .. testcode::
+        :skipif: True
+
+        import xgboost
+
+        from ray.tune import Tuner
+        from ray.train.xgboost import RayTrainReportCallback
+
+        def train_fn(config):
+            # Report log loss to Ray Tune after each validation epoch.
+            bst = xgboost.train(
+                ...,
+                callbacks=[
+                    RayTrainReportCallback(
+                        metrics={"loss": "eval-logloss"}, frequency=1
+                    )
+                ],
+            )
+
+        tuner = Tuner(train_fn)
+        results = tuner.fit()
+
+    Loading a model from a checkpoint reported by this callback.
+
+    .. testcode::
+        :skipif: True
+
+        from ray.train.xgboost import RayTrainReportCallback
+
+        # Get a `Checkpoint` object that is saved by the callback during training.
+        result = trainer.fit()
+        booster = RayTrainReportCallback.get_model(result.checkpoint)
+
+    """
+
+    CHECKPOINT_NAME = "model.ubj"
+
+    def __init__(
+        self,
+        metrics: Optional[Union[str, List[str], Dict[str, str]]] = None,
+        filename: str = CHECKPOINT_NAME,
+        frequency: int = 0,
+        checkpoint_at_end: bool = True,
+        results_postprocessing_fn: Optional[
+            Callable[[Dict[str, Union[float, List[float]]]], Dict[str, float]]
+        ] = None,
+    ):
+        if isinstance(metrics, str):
+            metrics = [metrics]
+        self._metrics = metrics
+        self._filename = filename
+        self._frequency = frequency
+        self._checkpoint_at_end = checkpoint_at_end
+        self._results_postprocessing_fn = results_postprocessing_fn
+
+        # Keeps track of the eval metrics from the last iteration,
+        # so that the latest metrics can be reported with the checkpoint
+        # at the end of training.
+        self._evals_log = None
+        # Keep track of the last checkpoint iteration to avoid double-checkpointing
+        # when using `checkpoint_at_end=True`.
+        self._last_checkpoint_iteration = None
+
+    @classmethod
+    def get_model(
+        cls, checkpoint: Checkpoint, filename: str = CHECKPOINT_NAME
+    ) -> Booster:
+        """Retrieve the model stored in a checkpoint reported by this callback.
+
+        Args:
+            checkpoint: The checkpoint object returned by a training run.
+                The checkpoint should be saved by an instance of this callback.
+            filename: The filename to load the model from, which should match
+                the filename used when creating the callback.
+        """
+        with checkpoint.as_directory() as checkpoint_path:
+            booster = Booster()
+            booster.load_model(Path(checkpoint_path, filename).as_posix())
+            return booster
+
+    def _get_report_dict(self, evals_log):
+        if isinstance(evals_log, OrderedDict):
+            # xgboost>=1.3
+            result_dict = flatten_dict(evals_log, delimiter="-")
+            for k in list(result_dict):
+                result_dict[k] = result_dict[k][-1]
+        else:
+            # xgboost<1.3
+            result_dict = dict(evals_log)
+        if not self._metrics:
+            report_dict = result_dict
+        else:
+            report_dict = {}
+            for key in self._metrics:
+                if isinstance(self._metrics, dict):
+                    metric = self._metrics[key]
+                else:
+                    metric = key
+                report_dict[key] = result_dict[metric]
+
+        if self._results_postprocessing_fn:
+            report_dict = self._results_postprocessing_fn(report_dict)
+
+        return report_dict
+
+    @contextmanager
+    def _get_checkpoint(self, model: Booster) -> Optional[Checkpoint]:
+        # NOTE: The world rank returns None for Tune usage without Train.
+        if ray.train.get_context().get_world_rank() in (0, None):
+            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
+                model.save_model(Path(temp_checkpoint_dir, self._filename).as_posix())
+                yield Checkpoint(temp_checkpoint_dir)
+        else:
+            yield None
+
+    def after_iteration(self, model: Booster, epoch: int, evals_log: Dict):
+        self._evals_log = evals_log
+
+        checkpointing_disabled = self._frequency == 0
+        # Ex: if frequency=2, checkpoint at epoch 1, 3, 5, ... (counting from 0)
+        should_checkpoint = (
+            not checkpointing_disabled and (epoch + 1) % self._frequency == 0
+        )
+
+        report_dict = self._get_report_dict(evals_log)
+        if should_checkpoint:
+            self._last_checkpoint_iteration = epoch
+            with self._get_checkpoint(model=model) as checkpoint:
+                ray.train.report(report_dict, checkpoint=checkpoint)
+        else:
+            ray.train.report(report_dict)
+
+    def after_training(self, model: Booster) -> Booster:
+        if not self._checkpoint_at_end:
+            return model
+
+        if (
+            self._last_checkpoint_iteration is not None
+            and model.num_boosted_rounds() - 1 == self._last_checkpoint_iteration
+        ):
+            # Avoids a duplicate checkpoint if the checkpoint frequency happens
+            # to align with the last iteration.
+            return model
+
+        report_dict = self._get_report_dict(self._evals_log) if self._evals_log else {}
+        with self._get_checkpoint(model=model) as checkpoint:
+            ray.train.report(report_dict, checkpoint=checkpoint)
+
+        return model
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/config.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..725326c70ffbff9d679c7498fe2f01c77e0c0531
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/config.py
@@ -0,0 +1,202 @@
+import json
+import logging
+import os
+import threading
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import Optional
+
+import xgboost
+from packaging.version import Version
+from xgboost import RabitTracker
+from xgboost.collective import CommunicatorContext
+
+import ray
+from ray.train._internal.worker_group import WorkerGroup
+from ray.train.backend import Backend, BackendConfig
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class XGBoostConfig(BackendConfig):
+    """Configuration for xgboost collective communication setup.
+
+    Ray Train will set up the necessary coordinator processes and environment
+    variables for your workers to communicate with each other.
+    Additional configuration options can be passed into the
+    `xgboost.collective.CommunicatorContext` that wraps your own `xgboost.train` code.
+
+    See the `xgboost.collective` module for more information:
+    https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/collective.py
+
+    Args:
+        xgboost_communicator: The backend to use for collective communication for
+            distributed xgboost training. For now, only "rabit" is supported.
+    """
+
+    xgboost_communicator: str = "rabit"
+
+    @property
+    def train_func_context(self):
+        @contextmanager
+        def collective_communication_context():
+            with CommunicatorContext(**_get_xgboost_args()):
+                yield
+
+        return collective_communication_context
+
+    @property
+    def backend_cls(self):
+        if self.xgboost_communicator == "rabit":
+            return (
+                _XGBoostRabitBackend
+                if Version(xgboost.__version__) >= Version("2.1.0")
+                else _XGBoostRabitBackend_pre_xgb210
+            )
+
+        raise NotImplementedError(f"Unsupported backend: {self.xgboost_communicator}")
+
+
+class _XGBoostRabitBackend(Backend):
+    def __init__(self):
+        self._tracker: Optional[RabitTracker] = None
+        self._wait_thread: Optional[threading.Thread] = None
+
+    def _setup_xgboost_distributed_backend(self, worker_group: WorkerGroup):
+        # Set up the rabit tracker on the Train driver.
+        num_workers = len(worker_group)
+        rabit_args = {"n_workers": num_workers}
+        train_driver_ip = ray.util.get_node_ip_address()
+
+        # NOTE: sortby="task" is needed to ensure that the xgboost worker ranks
+        # align with Ray Train worker ranks.
+        # The worker ranks will be sorted by `dmlc_task_id`,
+        # which is defined below.
+        self._tracker = RabitTracker(
+            n_workers=num_workers, host_ip=train_driver_ip, sortby="task"
+        )
+        self._tracker.start()
+
+        # The RabitTracker is started in a separate thread, and the
+        # `wait_for` method must be called for `worker_args` to return.
+        self._wait_thread = threading.Thread(target=self._tracker.wait_for, daemon=True)
+        self._wait_thread.start()
+
+        rabit_args.update(self._tracker.worker_args())
+
+        start_log = (
+            "RabitTracker coordinator started with parameters:\n"
+            f"{json.dumps(rabit_args, indent=2)}"
+        )
+        logger.debug(start_log)
+
+        def set_xgboost_communicator_args(args):
+            import ray.train
+
+            args["dmlc_task_id"] = (
+                f"[xgboost.ray-rank={ray.train.get_context().get_world_rank():08}]:"
+                f"{ray.get_runtime_context().get_actor_id()}"
+            )
+
+            _set_xgboost_args(args)
+
+        worker_group.execute(set_xgboost_communicator_args, rabit_args)
+
+    def on_training_start(
+        self, worker_group: WorkerGroup, backend_config: XGBoostConfig
+    ):
+        assert backend_config.xgboost_communicator == "rabit"
+        self._setup_xgboost_distributed_backend(worker_group)
+
+    def on_shutdown(self, worker_group: WorkerGroup, backend_config: XGBoostConfig):
+        timeout = 5
+
+        if self._wait_thread is not None:
+            self._wait_thread.join(timeout=timeout)
+
+            if self._wait_thread.is_alive():
+                logger.warning(
+                    "During shutdown, the RabitTracker thread failed to join "
+                    f"within {timeout} seconds. "
+                    "The process will still be terminated as part of Ray actor cleanup."
+                )
+
+
+class _XGBoostRabitBackend_pre_xgb210(Backend):
+    def __init__(self):
+        self._tracker: Optional[RabitTracker] = None
+
+    def _setup_xgboost_distributed_backend(self, worker_group: WorkerGroup):
+        # Set up the rabit tracker on the Train driver.
+        num_workers = len(worker_group)
+        rabit_args = {"DMLC_NUM_WORKER": num_workers}
+        train_driver_ip = ray.util.get_node_ip_address()
+
+        # NOTE: sortby="task" is needed to ensure that the xgboost worker ranks
+        # align with Ray Train worker ranks.
+        # The worker ranks will be sorted by `DMLC_TASK_ID`,
+        # which is defined below.
+        self._tracker = RabitTracker(
+            n_workers=num_workers, host_ip=train_driver_ip, sortby="task"
+        )
+        self._tracker.start(n_workers=num_workers)
+
+        worker_args = self._tracker.worker_envs()
+        rabit_args.update(worker_args)
+
+        start_log = (
+            "RabitTracker coordinator started with parameters:\n"
+            f"{json.dumps(rabit_args, indent=2)}"
+        )
+        logger.debug(start_log)
+
+        def set_xgboost_env_vars():
+            import ray.train
+
+            for k, v in rabit_args.items():
+                os.environ[k] = str(v)
+
+            # Ranks are assigned in increasing order of the worker's task id.
+            # This task id will be sorted by increasing world rank.
+            os.environ["DMLC_TASK_ID"] = (
+                f"[xgboost.ray-rank={ray.train.get_context().get_world_rank():08}]:"
+                f"{ray.get_runtime_context().get_actor_id()}"
+            )
+
+        worker_group.execute(set_xgboost_env_vars)
+
+    def on_training_start(
+        self, worker_group: WorkerGroup, backend_config: XGBoostConfig
+    ):
+        assert backend_config.xgboost_communicator == "rabit"
+        self._setup_xgboost_distributed_backend(worker_group)
+
+    def on_shutdown(self, worker_group: WorkerGroup, backend_config: XGBoostConfig):
+        if not self._tracker:
+            return
+
+        timeout = 5
+        self._tracker.thread.join(timeout=timeout)
+
+        if self._tracker.thread.is_alive():
+            logger.warning(
+                "During shutdown, the RabitTracker thread failed to join "
+                f"within {timeout} seconds. "
+                "The process will still be terminated as part of Ray actor cleanup."
+            )
+
+
+_xgboost_args: dict = {}
+_xgboost_args_lock = threading.Lock()
+
+
+def _set_xgboost_args(args):
+    with _xgboost_args_lock:
+        global _xgboost_args
+        _xgboost_args = args
+
+
+def _get_xgboost_args() -> dict:
+    with _xgboost_args_lock:
+        return _xgboost_args
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/v2.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..2494b479eb12c933a921cd927904a6d38ff34b49
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/v2.py
@@ -0,0 +1,133 @@
+import logging
+from typing import Any, Callable, Dict, Optional, Union
+
+import ray.train
+from ray.train import Checkpoint
+from ray.train.data_parallel_trainer import DataParallelTrainer
+from ray.train.trainer import GenDataset
+from ray.train.xgboost import XGBoostConfig
+
+logger = logging.getLogger(__name__)
+
+
+class XGBoostTrainer(DataParallelTrainer):
+    """A Trainer for distributed data-parallel XGBoost training.
+
+    Example
+    -------
+
+    .. testcode::
+
+        import xgboost
+
+        import ray.data
+        import ray.train
+        from ray.train.xgboost import RayTrainReportCallback
+        from ray.train.xgboost.v2 import XGBoostTrainer
+
+        def train_fn_per_worker(config: dict):
+            # (Optional) Add logic to resume training state from a checkpoint.
+            # ray.train.get_checkpoint()
+
+            # 1. Get the dataset shard for the worker and convert to a `xgboost.DMatrix`
+            train_ds_iter, eval_ds_iter = (
+                ray.train.get_dataset_shard("train"),
+                ray.train.get_dataset_shard("validation"),
+            )
+            train_ds, eval_ds = train_ds_iter.materialize(), eval_ds_iter.materialize()
+
+            train_df, eval_df = train_ds.to_pandas(), eval_ds.to_pandas()
+            train_X, train_y = train_df.drop("y", axis=1), train_df["y"]
+            eval_X, eval_y = eval_df.drop("y", axis=1), eval_df["y"]
+
+            dtrain = xgboost.DMatrix(train_X, label=train_y)
+            deval = xgboost.DMatrix(eval_X, label=eval_y)
+
+            params = {
+                "tree_method": "approx",
+                "objective": "reg:squarederror",
+                "eta": 1e-4,
+                "subsample": 0.5,
+                "max_depth": 2,
+            }
+
+            # 2. Do distributed data-parallel training.
+            # Ray Train sets up the necessary coordinator processes and
+            # environment variables for your workers to communicate with each other.
+            bst = xgboost.train(
+                params,
+                dtrain=dtrain,
+                evals=[(deval, "validation")],
+                num_boost_round=10,
+                callbacks=[RayTrainReportCallback()],
+            )
+
+        train_ds = ray.data.from_items([{"x": x, "y": x + 1} for x in range(32)])
+        eval_ds = ray.data.from_items([{"x": x, "y": x + 1} for x in range(16)])
+        trainer = XGBoostTrainer(
+            train_fn_per_worker,
+            datasets={"train": train_ds, "validation": eval_ds},
+            scaling_config=ray.train.ScalingConfig(num_workers=4),
+        )
+        result = trainer.fit()
+        booster = RayTrainReportCallback.get_model(result.checkpoint)
+
+    .. testoutput::
+        :hide:
+
+        ...
+
+    Args:
+        train_loop_per_worker: The training function to execute on each worker.
+            This function can either take in zero arguments or a single ``Dict``
+            argument which is set by defining ``train_loop_config``.
+            Within this function you can use any of the
+            :ref:`Ray Train Loop utilities <train-loop-api>`.
+        train_loop_config: A configuration ``Dict`` to pass in as an argument to
+            ``train_loop_per_worker``.
+            This is typically used for specifying hyperparameters.
+        xgboost_config: The configuration for setting up the distributed xgboost
+            backend. Defaults to using the "rabit" backend.
+            See :class:`~ray.train.xgboost.XGBoostConfig` for more info.
+        datasets: The Ray Datasets to use for training and validation.
+        dataset_config: The configuration for ingesting the input ``datasets``.
+            By default, all the Ray Datasets are split equally across workers.
+            See :class:`~ray.train.DataConfig` for more details.
+        scaling_config: The configuration for how to scale data parallel training.
+            ``num_workers`` determines how many Python processes are used for training,
+            and ``use_gpu`` determines whether or not each process should use GPUs.
+            See :class:`~ray.train.ScalingConfig` for more info.
+        run_config: The configuration for the execution of the training run.
+            See :class:`~ray.train.RunConfig` for more info.
+        resume_from_checkpoint: A checkpoint to resume training from.
+            This checkpoint can be accessed from within ``train_loop_per_worker``
+            by calling ``ray.train.get_checkpoint()``.
+        metadata: Dict that should be made available via
+            `ray.train.get_context().get_metadata()` and in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+    """
+
+    def __init__(
+        self,
+        train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]],
+        *,
+        train_loop_config: Optional[Dict] = None,
+        xgboost_config: Optional[XGBoostConfig] = None,
+        scaling_config: Optional[ray.train.ScalingConfig] = None,
+        run_config: Optional[ray.train.RunConfig] = None,
+        datasets: Optional[Dict[str, GenDataset]] = None,
+        dataset_config: Optional[ray.train.DataConfig] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+    ):
+        super(XGBoostTrainer, self).__init__(
+            train_loop_per_worker=train_loop_per_worker,
+            train_loop_config=train_loop_config,
+            backend_config=xgboost_config or XGBoostConfig(),
+            scaling_config=scaling_config,
+            dataset_config=dataset_config,
+            run_config=run_config,
+            datasets=datasets,
+            resume_from_checkpoint=resume_from_checkpoint,
+            metadata=metadata,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_checkpoint.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc8b70465b5c01f85d8f16f841eda2c0969748b8
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_checkpoint.py
@@ -0,0 +1,75 @@
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+
+import xgboost
+
+from ray.train._internal.framework_checkpoint import FrameworkCheckpoint
+from ray.util.annotations import PublicAPI
+
+if TYPE_CHECKING:
+    from ray.data.preprocessor import Preprocessor
+
+
+@PublicAPI(stability="beta")
+class XGBoostCheckpoint(FrameworkCheckpoint):
+    """A :py:class:`~ray.train.Checkpoint` with XGBoost-specific functionality."""
+
+    MODEL_FILENAME = "model.json"
+
+    @classmethod
+    def from_model(
+        cls,
+        booster: xgboost.Booster,
+        *,
+        preprocessor: Optional["Preprocessor"] = None,
+        path: Optional[str] = None,
+    ) -> "XGBoostCheckpoint":
+        """Create a :py:class:`~ray.train.Checkpoint` that stores an XGBoost
+        model.
+
+        Args:
+            booster: The XGBoost model to store in the checkpoint.
+            preprocessor: A fitted preprocessor to be applied before inference.
+            path: The path to the directory where the checkpoint file will be saved.
+                This should start as an empty directory, since the *entire*
+                directory will be treated as the checkpoint when reported.
+                By default, a temporary directory will be created.
+
+        Returns:
+            An :py:class:`XGBoostCheckpoint` containing the specified ``Estimator``.
+
+        Examples:
+
+            ... testcode::
+
+                import numpy as np
+                import ray
+                from ray.train.xgboost import XGBoostCheckpoint
+                import xgboost
+
+                train_X = np.array([[1, 2], [3, 4]])
+                train_y = np.array([0, 1])
+
+                model = xgboost.XGBClassifier().fit(train_X, train_y)
+                checkpoint = XGBoostCheckpoint.from_model(model.get_booster())
+
+        """
+        checkpoint_path = Path(path or tempfile.mkdtemp())
+
+        if not checkpoint_path.is_dir():
+            raise ValueError(f"`path` must be a directory, but got: {checkpoint_path}")
+
+        booster.save_model(checkpoint_path.joinpath(cls.MODEL_FILENAME).as_posix())
+
+        checkpoint = cls.from_directory(checkpoint_path.as_posix())
+        if preprocessor:
+            checkpoint.set_preprocessor(preprocessor)
+        return checkpoint
+
+    def get_model(self) -> xgboost.Booster:
+        """Retrieve the XGBoost model stored in this checkpoint."""
+        with self.as_directory() as checkpoint_path:
+            booster = xgboost.Booster()
+            booster.load_model(Path(checkpoint_path, self.MODEL_FILENAME).as_posix())
+            return booster
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_predictor.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b319b93b299bc02a5b83a2f1cdcfa1e8fab6e8e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_predictor.py
@@ -0,0 +1,160 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+
+import pandas as pd
+import xgboost
+
+from ray.air.constants import TENSOR_COLUMN_NAME
+from ray.air.data_batch_type import DataBatchType
+from ray.air.util.data_batch_conversion import _unwrap_ndarray_object_type_if_needed
+from ray.train.predictor import Predictor
+from ray.train.xgboost import XGBoostCheckpoint
+from ray.util.annotations import PublicAPI
+
+if TYPE_CHECKING:
+    from ray.data.preprocessor import Preprocessor
+
+
+@PublicAPI(stability="beta")
+class XGBoostPredictor(Predictor):
+    """A predictor for XGBoost models.
+
+    Args:
+        model: The XGBoost booster to use for predictions.
+        preprocessor: A preprocessor used to transform data batches prior
+            to prediction.
+    """
+
+    def __init__(
+        self, model: xgboost.Booster, preprocessor: Optional["Preprocessor"] = None
+    ):
+        self.model = model
+        super().__init__(preprocessor)
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(model={self.model!r}, "
+            f"preprocessor={self._preprocessor!r})"
+        )
+
+    @classmethod
+    def from_checkpoint(cls, checkpoint: XGBoostCheckpoint) -> "XGBoostPredictor":
+        """Instantiate the predictor from a Checkpoint.
+
+        This is a helper constructor that instantiates the predictor from a
+        framework-specific XGBoost checkpoint.
+
+        Args:
+            checkpoint: The checkpoint to load the model and preprocessor from.
+
+        """
+        model = checkpoint.get_model()
+        preprocessor = checkpoint.get_preprocessor()
+        return cls(model=model, preprocessor=preprocessor)
+
+    def predict(
+        self,
+        data: DataBatchType,
+        feature_columns: Optional[Union[List[str], List[int]]] = None,
+        dmatrix_kwargs: Optional[Dict[str, Any]] = None,
+        **predict_kwargs,
+    ) -> DataBatchType:
+        """Run inference on data batch.
+
+        The data is converted into an XGBoost DMatrix before being inputted to
+        the model.
+
+        Args:
+            data: A batch of input data.
+            feature_columns: The names or indices of the columns in the
+                data to use as features to predict on. If None, then use
+                all columns in ``data``.
+            dmatrix_kwargs: Dict of keyword arguments passed to ``xgboost.DMatrix``.
+            **predict_kwargs: Keyword arguments passed to ``xgboost.Booster.predict``.
+
+
+        Examples:
+
+        .. testcode::
+
+            import numpy as np
+            import xgboost as xgb
+            from ray.train.xgboost import XGBoostPredictor
+            train_X = np.array([[1, 2], [3, 4]])
+            train_y = np.array([0, 1])
+            model = xgb.XGBClassifier().fit(train_X, train_y)
+            predictor = XGBoostPredictor(model=model.get_booster())
+            data = np.array([[1, 2], [3, 4]])
+            predictions = predictor.predict(data)
+            # Only use first and second column as the feature
+            data = np.array([[1, 2, 8], [3, 4, 9]])
+            predictions = predictor.predict(data, feature_columns=[0, 1])
+
+        .. testcode::
+
+            import pandas as pd
+            import xgboost as xgb
+            from ray.train.xgboost import XGBoostPredictor
+            train_X = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+            train_y = pd.Series([0, 1])
+            model = xgb.XGBClassifier().fit(train_X, train_y)
+            predictor = XGBoostPredictor(model=model.get_booster())
+            # Pandas dataframe.
+            data = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+            predictions = predictor.predict(data)
+            # Only use first and second column as the feature
+            data = pd.DataFrame([[1, 2, 8], [3, 4, 9]], columns=["A", "B", "C"])
+            predictions = predictor.predict(data, feature_columns=["A", "B"])
+
+
+        Returns:
+            Prediction result.
+
+        """
+        return Predictor.predict(
+            self,
+            data,
+            feature_columns=feature_columns,
+            dmatrix_kwargs=dmatrix_kwargs,
+            **predict_kwargs,
+        )
+
+    def _predict_pandas(
+        self,
+        data: "pd.DataFrame",
+        feature_columns: Optional[Union[List[str], List[int]]] = None,
+        dmatrix_kwargs: Optional[Dict[str, Any]] = None,
+        **predict_kwargs,
+    ) -> "pd.DataFrame":
+        dmatrix_kwargs = dmatrix_kwargs or {}
+
+        feature_names = None
+        if TENSOR_COLUMN_NAME in data:
+            data = data[TENSOR_COLUMN_NAME].to_numpy()
+            data = _unwrap_ndarray_object_type_if_needed(data)
+            if feature_columns:
+                # In this case feature_columns is a list of integers
+                data = data[:, feature_columns]
+        elif feature_columns:
+            # feature_columns is a list of integers or strings
+            data = data[feature_columns].to_numpy()
+            # Only set the feature names if they are strings
+            if all(isinstance(fc, str) for fc in feature_columns):
+                feature_names = feature_columns
+        else:
+            feature_columns = data.columns.tolist()
+            data = data.to_numpy()
+
+            if all(isinstance(fc, str) for fc in feature_columns):
+                feature_names = feature_columns
+
+        if feature_names:
+            dmatrix_kwargs["feature_names"] = feature_names
+
+        matrix = xgboost.DMatrix(data, **dmatrix_kwargs)
+        df = pd.DataFrame(self.model.predict(matrix, **predict_kwargs))
+        df.columns = (
+            ["predictions"]
+            if len(df.columns) == 1
+            else [f"predictions_{i}" for i in range(len(df.columns))]
+        )
+        return df
diff --git a/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_trainer.py b/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b4f49b0f9ba6c3f0c60c06b346ea1514381fcab
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/xgboost/xgboost_trainer.py
@@ -0,0 +1,222 @@
+import logging
+from functools import partial
+from typing import Any, Dict, Optional
+
+import xgboost
+from packaging.version import Version
+
+import ray.train
+from ray.train import Checkpoint
+from ray.train.constants import _DEPRECATED_VALUE, TRAIN_DATASET_KEY
+from ray.train.trainer import GenDataset
+from ray.train.xgboost import RayTrainReportCallback
+from ray.train.xgboost.v2 import XGBoostTrainer as SimpleXGBoostTrainer
+from ray.util.annotations import PublicAPI
+
+logger = logging.getLogger(__name__)
+
+
+def _xgboost_train_fn_per_worker(
+    config: dict,
+    label_column: str,
+    num_boost_round: int,
+    dataset_keys: set,
+    xgboost_train_kwargs: dict,
+):
+    checkpoint = ray.train.get_checkpoint()
+    starting_model = None
+    remaining_iters = num_boost_round
+    if checkpoint:
+        starting_model = RayTrainReportCallback.get_model(checkpoint)
+        starting_iter = starting_model.num_boosted_rounds()
+        remaining_iters = num_boost_round - starting_iter
+        logger.info(
+            f"Model loaded from checkpoint will train for "
+            f"additional {remaining_iters} iterations (trees) in order "
+            "to achieve the target number of iterations "
+            f"({num_boost_round=})."
+        )
+
+    train_ds_iter = ray.train.get_dataset_shard(TRAIN_DATASET_KEY)
+    train_df = train_ds_iter.materialize().to_pandas()
+
+    eval_ds_iters = {
+        k: ray.train.get_dataset_shard(k)
+        for k in dataset_keys
+        if k != TRAIN_DATASET_KEY
+    }
+    eval_dfs = {k: d.materialize().to_pandas() for k, d in eval_ds_iters.items()}
+
+    train_X, train_y = train_df.drop(label_column, axis=1), train_df[label_column]
+    dtrain = xgboost.DMatrix(train_X, label=train_y)
+
+    # NOTE: Include the training dataset in the evaluation datasets.
+    # This allows `train-*` metrics to be calculated and reported.
+    evals = [(dtrain, TRAIN_DATASET_KEY)]
+
+    for eval_name, eval_df in eval_dfs.items():
+        eval_X, eval_y = eval_df.drop(label_column, axis=1), eval_df[label_column]
+        evals.append((xgboost.DMatrix(eval_X, label=eval_y), eval_name))
+
+    evals_result = {}
+    xgboost.train(
+        config,
+        dtrain=dtrain,
+        evals=evals,
+        evals_result=evals_result,
+        num_boost_round=remaining_iters,
+        xgb_model=starting_model,
+        **xgboost_train_kwargs,
+    )
+
+
+@PublicAPI(stability="beta")
+class XGBoostTrainer(SimpleXGBoostTrainer):
+    """A Trainer for data parallel XGBoost training.
+
+    This Trainer runs the XGBoost training loop in a distributed manner
+    using multiple Ray Actors.
+
+    .. note::
+        ``XGBoostTrainer`` does not modify or otherwise alter the working
+        of the XGBoost distributed training algorithm.
+        Ray only provides orchestration, data ingest and fault tolerance.
+        For more information on XGBoost distributed training, refer to
+        `XGBoost documentation <https://xgboost.readthedocs.io>`__.
+
+    Example:
+        .. testcode::
+
+            import ray
+
+            from ray.train.xgboost import XGBoostTrainer
+            from ray.train import ScalingConfig
+
+            train_dataset = ray.data.from_items(
+                [{"x": x, "y": x + 1} for x in range(32)])
+            trainer = XGBoostTrainer(
+                label_column="y",
+                params={"objective": "reg:squarederror"},
+                scaling_config=ScalingConfig(num_workers=3),
+                datasets={"train": train_dataset},
+            )
+            result = trainer.fit()
+
+        .. testoutput::
+            :hide:
+
+            ...
+
+    Args:
+        datasets: The Ray Datasets to use for training and validation. Must include a
+            "train" key denoting the training dataset. All non-training datasets will
+            be used as separate validation sets, each reporting a separate metric.
+        label_column: Name of the label column. A column with this name
+            must be present in the training dataset.
+        params: XGBoost training parameters.
+            Refer to `XGBoost documentation <https://xgboost.readthedocs.io/>`_
+            for a list of possible parameters.
+        num_boost_round: Target number of boosting iterations (trees in the model).
+            Note that unlike in ``xgboost.train``, this is the target number
+            of trees, meaning that if you set ``num_boost_round=10`` and pass a model
+            that has already been trained for 5 iterations, it will be trained for 5
+            iterations more, instead of 10 more.
+        scaling_config: Configuration for how to scale data parallel training.
+        run_config: Configuration for the execution of the training run.
+        dataset_config: The configuration for ingesting the input ``datasets``.
+            By default, all the Ray Datasets are split equally across workers.
+            See :class:`~ray.train.DataConfig` for more details.
+        resume_from_checkpoint: A checkpoint to resume training from.
+        metadata: Dict that should be made available in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+        **train_kwargs: Additional kwargs passed to ``xgboost.train()`` function.
+    """
+
+    _handles_checkpoint_freq = True
+    _handles_checkpoint_at_end = True
+
+    def __init__(
+        self,
+        *,
+        datasets: Dict[str, GenDataset],
+        label_column: str,
+        params: Dict[str, Any],
+        dmatrix_params: Optional[Dict[str, Dict[str, Any]]] = _DEPRECATED_VALUE,
+        num_boost_round: int = 10,
+        scaling_config: Optional[ray.train.ScalingConfig] = None,
+        run_config: Optional[ray.train.RunConfig] = None,
+        dataset_config: Optional[ray.train.DataConfig] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **train_kwargs,
+    ):
+        if Version(xgboost.__version__) < Version("1.7.0"):
+            raise ImportError(
+                "`XGBoostTrainer` requires the `xgboost` version to be >= 1.7.0. "
+                'Upgrade with: `pip install -U "xgboost>=1.7"`'
+            )
+
+        # TODO(justinvyu): [Deprecated] Remove in 2.11
+        if dmatrix_params != _DEPRECATED_VALUE:
+            raise DeprecationWarning(
+                "`dmatrix_params` is deprecated, since XGBoostTrainer no longer "
+                "depends on the `xgboost_ray.RayDMatrix` utility. "
+                "You can remove this argument and use `dataset_config` instead "
+                "to customize Ray Dataset ingestion."
+            )
+
+        # Initialize a default Ray Train metrics/checkpoint reporting callback if needed
+        callbacks = train_kwargs.get("callbacks", [])
+        user_supplied_callback = any(
+            isinstance(callback, RayTrainReportCallback) for callback in callbacks
+        )
+        callback_kwargs = {}
+        if run_config:
+            checkpoint_frequency = run_config.checkpoint_config.checkpoint_frequency
+            checkpoint_at_end = run_config.checkpoint_config.checkpoint_at_end
+
+            callback_kwargs["frequency"] = checkpoint_frequency
+            # Default `checkpoint_at_end=True` unless the user explicitly sets it.
+            callback_kwargs["checkpoint_at_end"] = (
+                checkpoint_at_end if checkpoint_at_end is not None else True
+            )
+
+        if not user_supplied_callback:
+            callbacks.append(RayTrainReportCallback(**callback_kwargs))
+        train_kwargs["callbacks"] = callbacks
+
+        train_fn_per_worker = partial(
+            _xgboost_train_fn_per_worker,
+            label_column=label_column,
+            num_boost_round=num_boost_round,
+            dataset_keys=set(datasets),
+            xgboost_train_kwargs=train_kwargs,
+        )
+
+        super(XGBoostTrainer, self).__init__(
+            train_loop_per_worker=train_fn_per_worker,
+            train_loop_config=params,
+            scaling_config=scaling_config,
+            run_config=run_config,
+            datasets=datasets,
+            dataset_config=dataset_config,
+            resume_from_checkpoint=resume_from_checkpoint,
+            metadata=metadata,
+        )
+
+    @classmethod
+    def get_model(
+        cls,
+        checkpoint: Checkpoint,
+    ) -> xgboost.Booster:
+        """Retrieve the XGBoost model stored in this checkpoint."""
+        return RayTrainReportCallback.get_model(checkpoint)
+
+    def _validate_attributes(self):
+        super()._validate_attributes()
+
+        if TRAIN_DATASET_KEY not in self.datasets:
+            raise KeyError(
+                f"'{TRAIN_DATASET_KEY}' key must be preset in `datasets`. "
+                f"Got {list(self.datasets.keys())}"
+            )
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..392a25b023f47ac1c4918bf5aaa04fa8be31db75
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/_mock.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/_mock.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b0b320732a7f570914c7d7d7d8e6ed2a01189b4c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/_mock.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/basic_variant.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/basic_variant.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..841a3b966f67287e36825ac1b892cd308942aba2
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/basic_variant.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/repeater.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/repeater.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b6350390ed40faa559c3ac738c33ed0060fc46d5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/repeater.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/searcher.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/searcher.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6bee4602674fd28c51db5d0ef68aae9e4d45e837
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/searcher.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/util.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/util.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7184cfbef4230f924cbe9c7b057673a87e3cc2a3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/util.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/variant_generator.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/variant_generator.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94ecd0ccffb3f48b4d7d2b3cc5c092d4e1b4be02
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/__pycache__/variant_generator.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/ax/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/search/ax/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2cc1df85b97152386a332358657e13e2dd06ede
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/ax/__init__.py
@@ -0,0 +1,3 @@
+from ray.tune.search.ax.ax_search import AxSearch
+
+__all__ = ["AxSearch"]
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/ax/ax_search.py b/.venv/lib/python3.11/site-packages/ray/tune/search/ax/ax_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..6404fcd85e7a5c7fff9626fac6ed0c1d2e280fa0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/ax/ax_search.py
@@ -0,0 +1,432 @@
+import copy
+import logging
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+
+from ray import cloudpickle
+from ray.tune.result import DEFAULT_METRIC
+from ray.tune.search import (
+    UNDEFINED_METRIC_MODE,
+    UNDEFINED_SEARCH_SPACE,
+    UNRESOLVED_SEARCH_SPACE,
+    Searcher,
+)
+from ray.tune.search.sample import (
+    Categorical,
+    Float,
+    Integer,
+    LogUniform,
+    Quantized,
+    Uniform,
+)
+from ray.tune.search.variant_generator import parse_spec_vars
+from ray.tune.utils.util import flatten_dict, unflatten_list_dict
+
+try:
+    import ax
+    from ax.service.ax_client import AxClient
+except ImportError:
+    ax = AxClient = None
+
+# This exception only exists in newer Ax releases for python 3.7
+try:
+    from ax.exceptions.core import DataRequiredError
+    from ax.exceptions.generation_strategy import MaxParallelismReachedException
+except ImportError:
+    MaxParallelismReachedException = DataRequiredError = Exception
+
+
+logger = logging.getLogger(__name__)
+
+
+class AxSearch(Searcher):
+    """Uses `Ax <https://ax.dev/>`_ to optimize hyperparameters.
+
+    Ax is a platform for understanding, managing, deploying, and
+    automating adaptive experiments. Ax provides an easy to use
+    interface with BoTorch, a flexible, modern library for Bayesian
+    optimization in PyTorch. More information can be found in https://ax.dev/.
+
+    To use this search algorithm, you must install Ax:
+
+    .. code-block:: bash
+
+        $ pip install ax-platform
+
+    Parameters:
+        space: Parameters in the experiment search space.
+            Required elements in the dictionaries are: "name" (name of
+            this parameter, string), "type" (type of the parameter: "range",
+            "fixed", or "choice", string), "bounds" for range parameters
+            (list of two values, lower bound first), "values" for choice
+            parameters (list of values), and "value" for fixed parameters
+            (single value).
+        metric: Name of the metric used as objective in this
+            experiment. This metric must be present in `raw_data` argument
+            to `log_data`. This metric must also be present in the dict
+            reported/returned by the Trainable. If None but a mode was passed,
+            the `ray.tune.result.DEFAULT_METRIC` will be used per default.
+        mode: One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute. Defaults to "max".
+        points_to_evaluate: Initial parameter suggestions to be run
+            first. This is for when you already have some good parameters
+            you want to run first to help the algorithm make better suggestions
+            for future parameters. Needs to be a list of dicts containing the
+            configurations.
+        parameter_constraints: Parameter constraints, such as
+            "x3 >= x4" or "x3 + x4 >= 2".
+        outcome_constraints: Outcome constraints of form
+            "metric_name >= bound", like "m1 <= 3."
+        ax_client: Optional AxClient instance. If this is set, do
+            not pass any values to these parameters: `space`, `metric`,
+            `parameter_constraints`, `outcome_constraints`.
+        **ax_kwargs: Passed to AxClient instance. Ignored if `AxClient` is not
+            None.
+
+    Tune automatically converts search spaces to Ax's format:
+
+    .. code-block:: python
+
+        from ray import train, tune
+        from ray.tune.search.ax import AxSearch
+
+        config = {
+            "x1": tune.uniform(0.0, 1.0),
+            "x2": tune.uniform(0.0, 1.0)
+        }
+
+        def easy_objective(config):
+            for i in range(100):
+                intermediate_result = config["x1"] + config["x2"] * i
+                train.report({"score": intermediate_result})
+
+        ax_search = AxSearch()
+        tuner = tune.Tuner(
+            easy_objective,
+            tune_config=tune.TuneConfig(
+                search_alg=ax_search,
+                metric="score",
+                mode="max",
+            ),
+            param_space=config,
+        )
+        tuner.fit()
+
+    If you would like to pass the search space manually, the code would
+    look like this:
+
+    .. code-block:: python
+
+        from ray import train, tune
+        from ray.tune.search.ax import AxSearch
+
+        parameters = [
+            {"name": "x1", "type": "range", "bounds": [0.0, 1.0]},
+            {"name": "x2", "type": "range", "bounds": [0.0, 1.0]},
+        ]
+
+        def easy_objective(config):
+            for i in range(100):
+                intermediate_result = config["x1"] + config["x2"] * i
+                train.report({"score": intermediate_result})
+
+        ax_search = AxSearch(space=parameters, metric="score", mode="max")
+        tuner = tune.Tuner(
+            easy_objective,
+            tune_config=tune.TuneConfig(
+                search_alg=ax_search,
+            ),
+        )
+        tuner.fit()
+
+    """
+
+    def __init__(
+        self,
+        space: Optional[Union[Dict, List[Dict]]] = None,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        points_to_evaluate: Optional[List[Dict]] = None,
+        parameter_constraints: Optional[List] = None,
+        outcome_constraints: Optional[List] = None,
+        ax_client: Optional[AxClient] = None,
+        **ax_kwargs,
+    ):
+        assert (
+            ax is not None
+        ), """Ax must be installed!
+            You can install AxSearch with the command:
+            `pip install ax-platform`."""
+
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+
+        super(AxSearch, self).__init__(
+            metric=metric,
+            mode=mode,
+        )
+
+        self._ax = ax_client
+        self._ax_kwargs = ax_kwargs or {}
+
+        if isinstance(space, dict) and space:
+            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
+            if domain_vars or grid_vars:
+                logger.warning(
+                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))
+                )
+                space = self.convert_search_space(space)
+
+        self._space = space
+        self._parameter_constraints = parameter_constraints
+        self._outcome_constraints = outcome_constraints
+
+        self._points_to_evaluate = copy.deepcopy(points_to_evaluate)
+
+        self._parameters = []
+        self._live_trial_mapping = {}
+
+        if self._ax or self._space:
+            self._setup_experiment()
+
+    def _setup_experiment(self):
+        if self._metric is None and self._mode:
+            # If only a mode was passed, use anonymous metric
+            self._metric = DEFAULT_METRIC
+
+        if not self._ax:
+            self._ax = AxClient(**self._ax_kwargs)
+
+        try:
+            exp = self._ax.experiment
+            has_experiment = True
+        except ValueError:
+            has_experiment = False
+
+        if not has_experiment:
+            if not self._space:
+                raise ValueError(
+                    "You have to create an Ax experiment by calling "
+                    "`AxClient.create_experiment()`, or you should pass an "
+                    "Ax search space as the `space` parameter to `AxSearch`, "
+                    "or pass a `param_space` dict to `tune.Tuner()`."
+                )
+            if self._mode not in ["min", "max"]:
+                raise ValueError(
+                    "Please specify the `mode` argument when initializing "
+                    "the `AxSearch` object or pass it to `tune.TuneConfig()`."
+                )
+            self._ax.create_experiment(
+                parameters=self._space,
+                objective_name=self._metric,
+                parameter_constraints=self._parameter_constraints,
+                outcome_constraints=self._outcome_constraints,
+                minimize=self._mode != "max",
+            )
+        else:
+            if any(
+                [
+                    self._space,
+                    self._parameter_constraints,
+                    self._outcome_constraints,
+                    self._mode,
+                    self._metric,
+                ]
+            ):
+                raise ValueError(
+                    "If you create the Ax experiment yourself, do not pass "
+                    "values for these parameters to `AxSearch`: {}.".format(
+                        [
+                            "space",
+                            "parameter_constraints",
+                            "outcome_constraints",
+                            "mode",
+                            "metric",
+                        ]
+                    )
+                )
+
+        exp = self._ax.experiment
+
+        # Update mode and metric from experiment if it has been passed
+        self._mode = "min" if exp.optimization_config.objective.minimize else "max"
+        self._metric = exp.optimization_config.objective.metric.name
+
+        self._parameters = list(exp.parameters)
+
+        if self._ax._enforce_sequential_optimization:
+            logger.warning(
+                "Detected sequential enforcement. Be sure to use "
+                "a ConcurrencyLimiter."
+            )
+
+    def set_search_properties(
+        self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
+    ):
+        if self._ax:
+            return False
+        space = self.convert_search_space(config)
+        self._space = space
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        self._setup_experiment()
+        return True
+
+    def suggest(self, trial_id: str) -> Optional[Dict]:
+        if not self._ax:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+            )
+
+        if not self._metric or not self._mode:
+            raise RuntimeError(
+                UNDEFINED_METRIC_MODE.format(
+                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
+                )
+            )
+
+        if self._points_to_evaluate:
+            config = self._points_to_evaluate.pop(0)
+            parameters, trial_index = self._ax.attach_trial(config)
+        else:
+            try:
+                parameters, trial_index = self._ax.get_next_trial()
+            except (MaxParallelismReachedException, DataRequiredError):
+                return None
+
+        self._live_trial_mapping[trial_id] = trial_index
+        try:
+            suggested_config = unflatten_list_dict(parameters)
+        except AssertionError:
+            # Fails to unflatten if keys are out of order, which only happens
+            # if search space includes a list with both constants and
+            # tunable hyperparameters:
+            # Ex: "a": [1, tune.uniform(2, 3), 4]
+            suggested_config = unflatten_list_dict(
+                {k: parameters[k] for k in sorted(parameters.keys())}
+            )
+        return suggested_config
+
+    def on_trial_complete(self, trial_id, result=None, error=False):
+        """Notification for the completion of trial.
+
+        Data of form key value dictionary of metric names and values.
+        """
+        if result:
+            self._process_result(trial_id, result)
+        self._live_trial_mapping.pop(trial_id)
+
+    def _process_result(self, trial_id, result):
+        ax_trial_index = self._live_trial_mapping[trial_id]
+        metrics_to_include = [self._metric] + [
+            oc.metric.name
+            for oc in self._ax.experiment.optimization_config.outcome_constraints
+        ]
+        metric_dict = {}
+        for key in metrics_to_include:
+            val = result[key]
+            if np.isnan(val) or np.isinf(val):
+                # Don't report trials with NaN metrics to Ax
+                self._ax.abandon_trial(
+                    trial_index=ax_trial_index,
+                    reason=f"nan/inf metrics reported by {trial_id}",
+                )
+                return
+            metric_dict[key] = (val, None)
+        self._ax.complete_trial(trial_index=ax_trial_index, raw_data=metric_dict)
+
+    @staticmethod
+    def convert_search_space(spec: Dict):
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        if grid_vars:
+            raise ValueError(
+                "Grid search parameters cannot be automatically converted "
+                "to an Ax search space."
+            )
+
+        # Flatten and resolve again after checking for grid search.
+        spec = flatten_dict(spec, prevent_delimiter=True)
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        def resolve_value(par, domain):
+            sampler = domain.get_sampler()
+            if isinstance(sampler, Quantized):
+                logger.warning(
+                    "AxSearch does not support quantization. Dropped quantization."
+                )
+                sampler = sampler.sampler
+
+            if isinstance(domain, Float):
+                if isinstance(sampler, LogUniform):
+                    return {
+                        "name": par,
+                        "type": "range",
+                        "bounds": [domain.lower, domain.upper],
+                        "value_type": "float",
+                        "log_scale": True,
+                    }
+                elif isinstance(sampler, Uniform):
+                    return {
+                        "name": par,
+                        "type": "range",
+                        "bounds": [domain.lower, domain.upper],
+                        "value_type": "float",
+                        "log_scale": False,
+                    }
+            elif isinstance(domain, Integer):
+                if isinstance(sampler, LogUniform):
+                    return {
+                        "name": par,
+                        "type": "range",
+                        "bounds": [domain.lower, domain.upper - 1],
+                        "value_type": "int",
+                        "log_scale": True,
+                    }
+                elif isinstance(sampler, Uniform):
+                    return {
+                        "name": par,
+                        "type": "range",
+                        "bounds": [domain.lower, domain.upper - 1],
+                        "value_type": "int",
+                        "log_scale": False,
+                    }
+            elif isinstance(domain, Categorical):
+                if isinstance(sampler, Uniform):
+                    return {"name": par, "type": "choice", "values": domain.categories}
+
+            raise ValueError(
+                "AxSearch does not support parameters of type "
+                "`{}` with samplers of type `{}`".format(
+                    type(domain).__name__, type(domain.sampler).__name__
+                )
+            )
+
+        # Parameter name is e.g. "a/b/c" for nested dicts,
+        # "a/d/0", "a/d/1" for nested lists (using the index in the list)
+        fixed_values = [
+            {"name": "/".join(str(p) for p in path), "type": "fixed", "value": val}
+            for path, val in resolved_vars
+        ]
+        resolved_values = [
+            resolve_value("/".join(str(p) for p in path), domain)
+            for path, domain in domain_vars
+        ]
+
+        return fixed_values + resolved_values
+
+    def save(self, checkpoint_path: str):
+        save_object = self.__dict__
+        with open(checkpoint_path, "wb") as outputFile:
+            cloudpickle.dump(save_object, outputFile)
+
+    def restore(self, checkpoint_path: str):
+        with open(checkpoint_path, "rb") as inputFile:
+            save_object = cloudpickle.load(inputFile)
+        self.__dict__.update(save_object)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2569dcd5849a69f9818e90ac6db9cc4891c2da9c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__init__.py
@@ -0,0 +1,3 @@
+from ray.tune.search.bayesopt.bayesopt_search import BayesOptSearch
+
+__all__ = ["BayesOptSearch"]
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5dd3af962a330305686af804fd3807d9b681132f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__pycache__/bayesopt_search.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__pycache__/bayesopt_search.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a846c0fd24823a44b165cd3afcce7bf213ad44c7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/__pycache__/bayesopt_search.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/bayesopt_search.py b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/bayesopt_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..97b67192bcc88aafe81a9dd5d361b03884d84688
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/bayesopt/bayesopt_search.py
@@ -0,0 +1,449 @@
+import json
+import logging
+import pickle
+from collections import defaultdict
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+from ray.tune.result import DEFAULT_METRIC
+from ray.tune.search import (
+    UNDEFINED_METRIC_MODE,
+    UNDEFINED_SEARCH_SPACE,
+    UNRESOLVED_SEARCH_SPACE,
+    Searcher,
+)
+from ray.tune.search.sample import Domain, Float, Quantized, Uniform
+from ray.tune.search.variant_generator import parse_spec_vars
+from ray.tune.utils import flatten_dict
+from ray.tune.utils.util import is_nan_or_inf, unflatten_dict
+
+try:  # Python 3 only -- needed for lint test.
+    import bayes_opt as byo
+except ImportError:
+    byo = None
+
+
+if TYPE_CHECKING:
+    from ray.tune import ExperimentAnalysis
+
+logger = logging.getLogger(__name__)
+
+
+def _dict_hash(config, precision):
+    flatconfig = flatten_dict(config)
+    for param, value in flatconfig.items():
+        if isinstance(value, float):
+            flatconfig[param] = "{:.{digits}f}".format(value, digits=precision)
+
+    hashed = json.dumps(flatconfig, sort_keys=True, default=str)
+    return hashed
+
+
+class BayesOptSearch(Searcher):
+    """Uses bayesian-optimization/BayesianOptimization to optimize hyperparameters.
+
+    bayesian-optimization/BayesianOptimization is a library for Bayesian Optimization. More
+    info can be found here: https://github.com/bayesian-optimization/BayesianOptimization.
+
+    This searcher will automatically filter out any NaN, inf or -inf
+    results.
+
+    You will need to install bayesian-optimization/BayesianOptimization via the following:
+
+    .. code-block:: bash
+
+        pip install bayesian-optimization==1.4.3
+
+    Initializing this search algorithm with a ``space`` requires that it's
+    in the ``BayesianOptimization`` search space format. Otherwise, you
+    should instead pass in a Tune search space into ``Tuner(param_space=...)``,
+    and the search space will be automatically converted for you.
+
+    See this ``BayesianOptimization`` example notebook
+    <https://github.com/bayesian-optimization/BayesianOptimization/blob/33b99ec0a4fc51239e1a2fca3eaa37ad6debac5d/examples/advanced-tour.ipynb>`_
+    for an example.
+
+    Args:
+        space: Continuous search space. Parameters will be sampled from
+            this space which will be used to run trials.
+        metric: The training result objective value attribute. If None
+            but a mode was passed, the anonymous metric `_metric` will be used
+            per default.
+        mode: One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute.
+        points_to_evaluate: Initial parameter suggestions to be run
+            first. This is for when you already have some good parameters
+            you want to run first to help the algorithm make better suggestions
+            for future parameters. Needs to be a list of dicts containing the
+            configurations.
+        utility_kwargs: Parameters to define the utility function.
+            The default value is a dictionary with three keys:
+            - kind: ucb (Upper Confidence Bound)
+            - kappa: 2.576
+            - xi: 0.0
+        random_state: Used to initialize BayesOpt.
+        random_search_steps: Number of initial random searches.
+            This is necessary to avoid initial local overfitting
+            of the Bayesian process.
+        verbose: Sets verbosity level for BayesOpt packages.
+        patience: If patience is set and we've repeated a trial numerous times,
+            we terminate the experiment.
+        skip_duplicate: skip duplicate config
+        analysis: Optionally, the previous analysis to integrate.
+
+    Tune automatically converts search spaces to BayesOptSearch's format:
+
+    .. code-block:: python
+
+        from ray import tune
+        from ray.tune.search.bayesopt import BayesOptSearch
+
+        config = {
+            "width": tune.uniform(0, 20),
+            "height": tune.uniform(-100, 100)
+        }
+
+        bayesopt = BayesOptSearch(metric="mean_loss", mode="min")
+        tuner = tune.Tuner(
+            my_func,
+            tune_config=tune.TuneConfig(
+                search_alg=baysopt,
+            ),
+            param_space=config,
+        )
+        tuner.fit()
+
+    If you would like to pass the search space manually, the code would
+    look like this:
+
+    .. code-block:: python
+
+        from ray import tune
+        from ray.tune.search.bayesopt import BayesOptSearch
+
+        space = {
+            'width': (0, 20),
+            'height': (-100, 100),
+        }
+        bayesopt = BayesOptSearch(space, metric="mean_loss", mode="min")
+        tuner = tune.Tuner(
+            my_func,
+            tune_config=tune.TuneConfig(
+                search_alg=bayesopt,
+            ),
+        )
+        tuner.fit()
+
+    """
+
+    # bayes_opt.BayesianOptimization: Optimization object
+    optimizer = None
+
+    def __init__(
+        self,
+        space: Optional[Dict] = None,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        points_to_evaluate: Optional[List[Dict]] = None,
+        utility_kwargs: Optional[Dict] = None,
+        random_state: int = 42,
+        random_search_steps: int = 10,
+        verbose: int = 0,
+        patience: int = 5,
+        skip_duplicate: bool = True,
+        analysis: Optional["ExperimentAnalysis"] = None,
+    ):
+        assert byo is not None, (
+            "BayesOpt must be installed!. You can install BayesOpt with"
+            " the command: `pip install bayesian-optimization==1.4.3`."
+        )
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+        self._config_counter = defaultdict(int)
+        self._patience = patience
+        # int: Precision at which to hash values.
+        self.repeat_float_precision = 5
+        if self._patience <= 0:
+            raise ValueError("patience must be set to a value greater than 0!")
+        self._skip_duplicate = skip_duplicate
+        super(BayesOptSearch, self).__init__(
+            metric=metric,
+            mode=mode,
+        )
+
+        if utility_kwargs is None:
+            # The defaults arguments are the same
+            # as in the package BayesianOptimization
+            utility_kwargs = dict(
+                kind="ucb",
+                kappa=2.576,
+                xi=0.0,
+            )
+
+        if mode == "max":
+            self._metric_op = 1.0
+        elif mode == "min":
+            self._metric_op = -1.0
+
+        self._points_to_evaluate = points_to_evaluate
+
+        self._live_trial_mapping = {}
+        self._buffered_trial_results = []
+        self.random_search_trials = random_search_steps
+        self._total_random_search_trials = 0
+
+        self.utility = byo.UtilityFunction(**utility_kwargs)
+
+        self._analysis = analysis
+
+        if isinstance(space, dict) and space:
+            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
+            if domain_vars or grid_vars:
+                logger.warning(
+                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))
+                )
+                space = self.convert_search_space(space, join=True)
+
+        self._space = space
+        self._verbose = verbose
+        self._random_state = random_state
+
+        self.optimizer = None
+        if space:
+            self._setup_optimizer()
+
+    def _setup_optimizer(self):
+        if self._metric is None and self._mode:
+            # If only a mode was passed, use anonymous metric
+            self._metric = DEFAULT_METRIC
+
+        self.optimizer = byo.BayesianOptimization(
+            f=None,
+            pbounds=self._space,
+            verbose=self._verbose,
+            random_state=self._random_state,
+        )
+
+        # Registering the provided analysis, if given
+        if self._analysis is not None:
+            self.register_analysis(self._analysis)
+
+    def set_search_properties(
+        self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
+    ) -> bool:
+        if self.optimizer:
+            return False
+        space = self.convert_search_space(config)
+        self._space = space
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        if self._mode == "max":
+            self._metric_op = 1.0
+        elif self._mode == "min":
+            self._metric_op = -1.0
+
+        self._setup_optimizer()
+        return True
+
+    def suggest(self, trial_id: str) -> Optional[Dict]:
+        """Return new point to be explored by black box function.
+
+        Args:
+            trial_id: Id of the trial.
+                This is a short alphanumerical string.
+
+        Returns:
+            Either a dictionary describing the new point to explore or
+            None, when no new point is to be explored for the time being.
+        """
+        if not self.optimizer:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+            )
+
+        if not self._metric or not self._mode:
+            raise RuntimeError(
+                UNDEFINED_METRIC_MODE.format(
+                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
+                )
+            )
+
+        if self._points_to_evaluate:
+            config = self._points_to_evaluate.pop(0)
+        else:
+            # We compute the new point to explore
+            config = self.optimizer.suggest(self.utility)
+
+        config_hash = _dict_hash(config, self.repeat_float_precision)
+        # Check if already computed
+        already_seen = config_hash in self._config_counter
+        self._config_counter[config_hash] += 1
+        top_repeats = max(self._config_counter.values())
+
+        # If patience is set and we've repeated a trial numerous times,
+        # we terminate the experiment.
+        if self._patience is not None and top_repeats > self._patience:
+            return Searcher.FINISHED
+        # If we have seen a value before, we'll skip it.
+        if already_seen and self._skip_duplicate:
+            logger.info("Skipping duplicated config: {}.".format(config))
+            return None
+
+        # If we are still in the random search part and we are waiting for
+        # trials to complete
+        if len(self._buffered_trial_results) < self.random_search_trials:
+            # We check if we have already maxed out the number of requested
+            # random search trials
+            if self._total_random_search_trials == self.random_search_trials:
+                # If so we stop the suggestion and return None
+                return None
+            # Otherwise we increase the total number of rndom search trials
+            if config:
+                self._total_random_search_trials += 1
+
+        # Save the new trial to the trial mapping
+        self._live_trial_mapping[trial_id] = config
+
+        # Return a deep copy of the mapping
+        return unflatten_dict(config)
+
+    def register_analysis(self, analysis: "ExperimentAnalysis"):
+        """Integrate the given analysis into the gaussian process.
+
+        Args:
+            analysis: Optionally, the previous analysis
+                to integrate.
+        """
+        for (_, report), params in zip(
+            analysis.dataframe(metric=self._metric, mode=self._mode).iterrows(),
+            analysis.get_all_configs().values(),
+        ):
+            # We add the obtained results to the
+            # gaussian process optimizer
+            self._register_result(params, report)
+
+    def on_trial_complete(
+        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
+    ):
+        """Notification for the completion of trial.
+
+        Args:
+            trial_id: Id of the trial.
+                This is a short alphanumerical string.
+            result: Dictionary of result.
+                May be none when some error occurs.
+            error: Boolean representing a previous error state.
+                The result should be None when error is True.
+        """
+        # We try to get the parameters used for this trial
+        params = self._live_trial_mapping.pop(trial_id, None)
+
+        # The results may be None if some exception is raised during the trial.
+        # Also, if the parameters are None (were already processed)
+        # we interrupt the following procedure.
+        # Additionally, if somehow the error is True but
+        # the remaining values are not we also block the method
+        if result is None or params is None or error:
+            return
+
+        # If we don't have to execute some random search steps
+        if len(self._buffered_trial_results) >= self.random_search_trials:
+            #  we simply register the obtained result
+            self._register_result(params, result)
+            return
+
+        # We store the results into a temporary cache
+        self._buffered_trial_results.append((params, result))
+
+        # If the random search finished,
+        # we update the BO with all the computer points.
+        if len(self._buffered_trial_results) == self.random_search_trials:
+            for params, result in self._buffered_trial_results:
+                self._register_result(params, result)
+
+    def _register_result(self, params: Tuple[str], result: Dict):
+        """Register given tuple of params and results."""
+        if is_nan_or_inf(result[self.metric]):
+            return
+        self.optimizer.register(params, self._metric_op * result[self.metric])
+
+    def get_state(self) -> Dict[str, Any]:
+        state = self.__dict__.copy()
+        return state
+
+    def set_state(self, state: Dict[str, Any]):
+        self.__dict__.update(state)
+
+    def save(self, checkpoint_path: str):
+        """Storing current optimizer state."""
+        save_object = self.get_state()
+        with open(checkpoint_path, "wb") as f:
+            pickle.dump(save_object, f)
+
+    def restore(self, checkpoint_path: str):
+        """Restoring current optimizer state."""
+        with open(checkpoint_path, "rb") as f:
+            save_object = pickle.load(f)
+
+        if isinstance(save_object, dict):
+            self.set_state(save_object)
+        else:
+            # Backwards compatibility
+            (
+                self.optimizer,
+                self._buffered_trial_results,
+                self._total_random_search_trials,
+                self._config_counter,
+                self._points_to_evaluate,
+            ) = save_object
+
+    @staticmethod
+    def convert_search_space(spec: Dict, join: bool = False) -> Dict:
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        if grid_vars:
+            raise ValueError(
+                "Grid search parameters cannot be automatically converted "
+                "to a BayesOpt search space."
+            )
+
+        # Flatten and resolve again after checking for grid search.
+        spec = flatten_dict(spec, prevent_delimiter=True)
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        def resolve_value(domain: Domain) -> Tuple[float, float]:
+            sampler = domain.get_sampler()
+            if isinstance(sampler, Quantized):
+                logger.warning(
+                    "BayesOpt search does not support quantization. "
+                    "Dropped quantization."
+                )
+                sampler = sampler.get_sampler()
+
+            if isinstance(domain, Float):
+                if domain.sampler is not None and not isinstance(
+                    domain.sampler, Uniform
+                ):
+                    logger.warning(
+                        "BayesOpt does not support specific sampling methods. "
+                        "The {} sampler will be dropped.".format(sampler)
+                    )
+                return (domain.lower, domain.upper)
+
+            raise ValueError(
+                "BayesOpt does not support parameters of type "
+                "`{}`".format(type(domain).__name__)
+            )
+
+        # Parameter name is e.g. "a/b/c" for nested dicts
+        bounds = {"/".join(path): resolve_value(domain) for path, domain in domain_vars}
+
+        if join:
+            spec.update(bounds)
+            bounds = spec
+
+        return bounds
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..40068b1562adcd327627c8b8f641079c95364ccc
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__init__.py
@@ -0,0 +1,3 @@
+from ray.tune.search.bohb.bohb_search import BOHB, TuneBOHB
+
+__all__ = ["BOHB", "TuneBOHB"]
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45c50a1779d2ec6f582c4c05a31302221ea15ca9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__pycache__/bohb_search.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__pycache__/bohb_search.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4636694f7e40cdd7fa0502a7a9bf4c33a647a401
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/__pycache__/bohb_search.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/bohb_search.py b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/bohb_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fa701b700f7f5adf1eed308364725c779c5466d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/bohb/bohb_search.py
@@ -0,0 +1,383 @@
+"""BOHB (Bayesian Optimization with HyperBand)"""
+
+import copy
+import logging
+import math
+from typing import Dict, List, Optional, Union
+
+# use cloudpickle instead of pickle to make BOHB obj
+# pickleable
+from ray import cloudpickle
+from ray.tune.result import DEFAULT_METRIC
+from ray.tune.search import (
+    UNDEFINED_METRIC_MODE,
+    UNDEFINED_SEARCH_SPACE,
+    UNRESOLVED_SEARCH_SPACE,
+    Searcher,
+)
+from ray.tune.search.sample import (
+    Categorical,
+    Domain,
+    Float,
+    Integer,
+    LogUniform,
+    Normal,
+    Quantized,
+    Uniform,
+)
+from ray.tune.search.variant_generator import parse_spec_vars
+from ray.tune.utils.util import flatten_dict, unflatten_list_dict
+
+try:
+    import ConfigSpace
+    from hpbandster.optimizers.config_generators.bohb import BOHB
+except ImportError:
+    BOHB = ConfigSpace = None
+
+logger = logging.getLogger(__name__)
+
+
+class _BOHBJobWrapper:
+    """Mock object for HpBandSter to process."""
+
+    def __init__(self, loss: float, budget: float, config: Dict):
+        self.result = {"loss": loss}
+        self.kwargs = {"budget": budget, "config": config.copy()}
+        self.exception = None
+
+
+class TuneBOHB(Searcher):
+    """BOHB suggestion component.
+
+
+    Requires HpBandSter and ConfigSpace to be installed. You can install
+    HpBandSter and ConfigSpace with: ``pip install hpbandster ConfigSpace``.
+
+    This should be used in conjunction with HyperBandForBOHB.
+
+    Args:
+        space: Continuous ConfigSpace search space.
+            Parameters will be sampled from this space which will be used
+            to run trials.
+        bohb_config: configuration for HpBandSter BOHB algorithm
+        metric: The training result objective value attribute. If None
+            but a mode was passed, the anonymous metric `_metric` will be used
+            per default.
+        mode: One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute.
+        points_to_evaluate: Initial parameter suggestions to be run
+            first. This is for when you already have some good parameters
+            you want to run first to help the algorithm make better suggestions
+            for future parameters. Needs to be a list of dicts containing the
+            configurations.
+        seed: Optional random seed to initialize the random number
+            generator. Setting this should lead to identical initial
+            configurations at each run.
+        max_concurrent: Number of maximum concurrent trials.
+            If this Searcher is used in a ``ConcurrencyLimiter``, the
+            ``max_concurrent`` value passed to it will override the
+            value passed here. Set to <= 0 for no limit on concurrency.
+
+    Tune automatically converts search spaces to TuneBOHB's format:
+
+    .. code-block:: python
+
+        config = {
+            "width": tune.uniform(0, 20),
+            "height": tune.uniform(-100, 100),
+            "activation": tune.choice(["relu", "tanh"])
+        }
+
+        algo = TuneBOHB(metric="mean_loss", mode="min")
+        bohb = HyperBandForBOHB(
+            time_attr="training_iteration",
+            metric="mean_loss",
+            mode="min",
+            max_t=100)
+        run(my_trainable, config=config, scheduler=bohb, search_alg=algo)
+
+    If you would like to pass the search space manually, the code would
+    look like this:
+
+    .. code-block:: python
+
+        import ConfigSpace as CS
+
+        config_space = CS.ConfigurationSpace()
+        config_space.add_hyperparameter(
+            CS.UniformFloatHyperparameter("width", lower=0, upper=20))
+        config_space.add_hyperparameter(
+            CS.UniformFloatHyperparameter("height", lower=-100, upper=100))
+        config_space.add_hyperparameter(
+            CS.CategoricalHyperparameter(
+                name="activation", choices=["relu", "tanh"]))
+
+        algo = TuneBOHB(
+            config_space, metric="mean_loss", mode="min")
+        bohb = HyperBandForBOHB(
+            time_attr="training_iteration",
+            metric="mean_loss",
+            mode="min",
+            max_t=100)
+        run(my_trainable, scheduler=bohb, search_alg=algo)
+
+    """
+
+    def __init__(
+        self,
+        space: Optional[Union[Dict, "ConfigSpace.ConfigurationSpace"]] = None,
+        bohb_config: Optional[Dict] = None,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        points_to_evaluate: Optional[List[Dict]] = None,
+        seed: Optional[int] = None,
+        max_concurrent: int = 0,
+    ):
+        assert (
+            BOHB is not None
+        ), """HpBandSter must be installed!
+            You can install HpBandSter with the command:
+            `pip install hpbandster ConfigSpace`."""
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+        self.trial_to_params = {}
+        self._metric = metric
+
+        self._bohb_config = bohb_config
+
+        if isinstance(space, dict) and space:
+            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
+            if domain_vars or grid_vars:
+                logger.warning(
+                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))
+                )
+                space = self.convert_search_space(space)
+
+        self._space = space
+        self._seed = seed
+
+        self.running = set()
+        self.paused = set()
+
+        self._max_concurrent = max_concurrent
+        self._points_to_evaluate = points_to_evaluate
+
+        super(TuneBOHB, self).__init__(
+            metric=self._metric,
+            mode=mode,
+        )
+
+        if self._space:
+            self._setup_bohb()
+
+    def set_max_concurrency(self, max_concurrent: int) -> bool:
+        self._max_concurrent = max_concurrent
+        return True
+
+    def _setup_bohb(self):
+        from hpbandster.optimizers.config_generators.bohb import BOHB
+
+        if self._metric is None and self._mode:
+            # If only a mode was passed, use anonymous metric
+            self._metric = DEFAULT_METRIC
+
+        if self._mode == "max":
+            self._metric_op = -1.0
+        elif self._mode == "min":
+            self._metric_op = 1.0
+
+        if self._seed is not None:
+            self._space.seed(self._seed)
+
+        self.running = set()
+        self.paused = set()
+
+        bohb_config = self._bohb_config or {}
+        self.bohber = BOHB(self._space, **bohb_config)
+
+    def set_search_properties(
+        self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
+    ) -> bool:
+        if self._space:
+            return False
+        space = self.convert_search_space(config)
+        self._space = space
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        self._setup_bohb()
+        return True
+
+    def suggest(self, trial_id: str) -> Optional[Dict]:
+        if not self._space:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+            )
+
+        if not self._metric or not self._mode:
+            raise RuntimeError(
+                UNDEFINED_METRIC_MODE.format(
+                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
+                )
+            )
+
+        max_concurrent = (
+            self._max_concurrent if self._max_concurrent > 0 else float("inf")
+        )
+        if len(self.running) >= max_concurrent:
+            return None
+
+        if self._points_to_evaluate:
+            config = self._points_to_evaluate.pop(0)
+        else:
+            # This parameter is not used in hpbandster implementation.
+            config, _ = self.bohber.get_config(None)
+        self.trial_to_params[trial_id] = copy.deepcopy(config)
+        self.running.add(trial_id)
+        return unflatten_list_dict(config)
+
+    def on_trial_result(self, trial_id: str, result: Dict):
+        if trial_id not in self.paused:
+            self.running.add(trial_id)
+        if "hyperband_info" not in result:
+            logger.warning(
+                "BOHB Info not detected in result. Are you using "
+                "HyperBandForBOHB as a scheduler?"
+            )
+        elif "budget" in result.get("hyperband_info", {}):
+            hbs_wrapper = self.to_wrapper(trial_id, result)
+            self.bohber.new_result(hbs_wrapper)
+
+    def on_trial_complete(
+        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
+    ):
+        del self.trial_to_params[trial_id]
+        self.paused.discard(trial_id)
+        self.running.discard(trial_id)
+
+    def to_wrapper(self, trial_id: str, result: Dict) -> _BOHBJobWrapper:
+        return _BOHBJobWrapper(
+            self._metric_op * result[self.metric],
+            result["hyperband_info"]["budget"],
+            self.trial_to_params[trial_id],
+        )
+
+    # BOHB Specific.
+    # TODO(team-ml): Refactor alongside HyperBandForBOHB
+    def on_pause(self, trial_id: str):
+        self.paused.add(trial_id)
+        self.running.discard(trial_id)
+
+    def on_unpause(self, trial_id: str):
+        self.paused.discard(trial_id)
+        self.running.add(trial_id)
+
+    @staticmethod
+    def convert_search_space(spec: Dict) -> "ConfigSpace.ConfigurationSpace":
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        if grid_vars:
+            raise ValueError(
+                "Grid search parameters cannot be automatically converted "
+                "to a TuneBOHB search space."
+            )
+
+        # Flatten and resolve again after checking for grid search.
+        spec = flatten_dict(spec, prevent_delimiter=True)
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        def resolve_value(
+            par: str, domain: Domain
+        ) -> ConfigSpace.hyperparameters.Hyperparameter:
+            quantize = None
+
+            sampler = domain.get_sampler()
+            if isinstance(sampler, Quantized):
+                quantize = sampler.q
+                sampler = sampler.sampler
+
+            if isinstance(domain, Float):
+                if isinstance(sampler, LogUniform):
+                    lower = domain.lower
+                    upper = domain.upper
+                    if quantize:
+                        lower = math.ceil(domain.lower / quantize) * quantize
+                        upper = math.floor(domain.upper / quantize) * quantize
+                    return ConfigSpace.UniformFloatHyperparameter(
+                        par, lower=lower, upper=upper, q=quantize, log=True
+                    )
+                elif isinstance(sampler, Uniform):
+                    lower = domain.lower
+                    upper = domain.upper
+                    if quantize:
+                        lower = math.ceil(domain.lower / quantize) * quantize
+                        upper = math.floor(domain.upper / quantize) * quantize
+                    return ConfigSpace.UniformFloatHyperparameter(
+                        par, lower=lower, upper=upper, q=quantize, log=False
+                    )
+                elif isinstance(sampler, Normal):
+                    return ConfigSpace.hyperparameters.NormalFloatHyperparameter(
+                        par, mu=sampler.mean, sigma=sampler.sd, q=quantize, log=False
+                    )
+
+            elif isinstance(domain, Integer):
+                if isinstance(sampler, LogUniform):
+                    lower = domain.lower
+                    upper = domain.upper
+                    if quantize:
+                        lower = math.ceil(domain.lower / quantize) * quantize
+                        upper = math.floor(domain.upper / quantize) * quantize
+                    else:
+                        # Tune search space integers are exclusive
+                        upper -= 1
+                    return ConfigSpace.UniformIntegerHyperparameter(
+                        par, lower=lower, upper=upper, q=quantize, log=True
+                    )
+                elif isinstance(sampler, Uniform):
+                    lower = domain.lower
+                    upper = domain.upper
+                    if quantize:
+                        lower = math.ceil(domain.lower / quantize) * quantize
+                        upper = math.floor(domain.upper / quantize) * quantize
+                    else:
+                        # Tune search space integers are exclusive
+                        upper -= 1
+                    return ConfigSpace.UniformIntegerHyperparameter(
+                        par, lower=lower, upper=upper, q=quantize, log=False
+                    )
+
+            elif isinstance(domain, Categorical):
+                if isinstance(sampler, Uniform):
+                    return ConfigSpace.CategoricalHyperparameter(
+                        par, choices=domain.categories
+                    )
+
+            raise ValueError(
+                "TuneBOHB does not support parameters of type "
+                "`{}` with samplers of type `{}`".format(
+                    type(domain).__name__, type(domain.sampler).__name__
+                )
+            )
+
+        cs = ConfigSpace.ConfigurationSpace()
+        for path, domain in domain_vars:
+            par = "/".join(str(p) for p in path)
+            value = resolve_value(par, domain)
+            cs.add_hyperparameter(value)
+
+        return cs
+
+    def save(self, checkpoint_path: str):
+        save_object = self.__dict__
+        with open(checkpoint_path, "wb") as outputFile:
+            cloudpickle.dump(save_object, outputFile)
+
+    def restore(self, checkpoint_path: str):
+        with open(checkpoint_path, "rb") as inputFile:
+            save_object = cloudpickle.load(inputFile)
+        self.__dict__.update(save_object)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/hebo/hebo_search.py b/.venv/lib/python3.11/site-packages/ray/tune/search/hebo/hebo_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..7145960d301c6befddc415f650e947b1180faa32
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/hebo/hebo_search.py
@@ -0,0 +1,468 @@
+import logging
+import pickle
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+import pandas as pd
+
+from ray.tune.result import DEFAULT_METRIC
+from ray.tune.search import (
+    UNDEFINED_METRIC_MODE,
+    UNDEFINED_SEARCH_SPACE,
+    UNRESOLVED_SEARCH_SPACE,
+    Searcher,
+)
+from ray.tune.search.sample import (
+    Categorical,
+    Domain,
+    Float,
+    Integer,
+    LogUniform,
+    Quantized,
+    Uniform,
+)
+from ray.tune.search.variant_generator import parse_spec_vars
+from ray.tune.utils.util import is_nan_or_inf, unflatten_dict, validate_warmstart
+
+try:  # Python 3 only -- needed for lint test.
+    import hebo
+    import torch  # hebo has torch as a dependency
+except ImportError:
+    hebo = None
+
+logger = logging.getLogger(__name__)
+
+SPACE_ERROR_MESSAGE = (
+    "Space must be either a HEBO DesignSpace object"
+    "or a dictionary with ONLY tune search spaces."
+)
+
+
+class HEBOSearch(Searcher):
+    """Uses HEBO (Heteroscedastic Evolutionary Bayesian Optimization)
+    to optimize hyperparameters.
+
+    HEBO is a cutting edge black-box optimization framework created
+    by Huawei's Noah Ark. More info can be found here:
+    https://github.com/huawei-noah/HEBO/tree/master/HEBO.
+
+    `space` can either be a HEBO's `DesignSpace` object or a dict of Tune
+    search spaces.
+
+    Please note that the first few trials will be random and used
+    to kickstart the search process. In order to achieve good results,
+    we recommend setting the number of trials to at least 16.
+
+    Maximum number of concurrent trials is determined by ``max_concurrent``
+    argument. Trials will be done in batches of ``max_concurrent`` trials.
+    If this Searcher is used in a ``ConcurrencyLimiter``, the
+    ``max_concurrent`` value passed to it will override the value passed
+    here.
+
+    Args:
+        space: A dict mapping parameter names to Tune search spaces or a
+            HEBO DesignSpace object.
+        metric: The training result objective value attribute. If None
+            but a mode was passed, the anonymous metric `_metric` will be used
+            per default.
+        mode: One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute.
+        points_to_evaluate: Initial parameter suggestions to be run
+            first. This is for when you already have some good parameters
+            you want to run first to help the algorithm make better suggestions
+            for future parameters. Needs to be a list of dicts containing the
+            configurations.
+        evaluated_rewards: If you have previously evaluated the
+            parameters passed in as points_to_evaluate you can avoid
+            re-running those trials by passing in the reward attributes
+            as a list so the optimiser can be told the results without
+            needing to re-compute the trial. Must be the same length as
+            points_to_evaluate.
+        random_state_seed: Seed for reproducible
+            results. Defaults to None. Please note that setting this to a value
+            will change global random states for `numpy` and `torch`
+            on initalization and loading from checkpoint.
+        max_concurrent: Number of maximum concurrent trials.
+            If this Searcher is used in a ``ConcurrencyLimiter``, the
+            ``max_concurrent`` value passed to it will override the
+            value passed here.
+        **kwargs: The keyword arguments will be passed to `HEBO()``.
+
+    Tune automatically converts search spaces to HEBO's format:
+
+    .. code-block:: python
+
+        from ray import tune
+        from ray.tune.search.hebo import HEBOSearch
+
+        config = {
+            "width": tune.uniform(0, 20),
+            "height": tune.uniform(-100, 100)
+        }
+
+        hebo = HEBOSearch(metric="mean_loss", mode="min")
+        tuner = tune.Tuner(
+            trainable_function,
+            tune_config=tune.TuneConfig(
+                search_alg=hebo
+            ),
+            param_space=config
+        )
+        tuner.fit()
+
+    Alternatively, you can pass a HEBO `DesignSpace` object manually to the
+    Searcher:
+
+    .. code-block:: python
+
+        from ray import tune
+        from ray.tune.search.hebo import HEBOSearch
+        from hebo.design_space.design_space import DesignSpace
+
+        space_config = [
+            {'name' : 'width', 'type' : 'num', 'lb' : 0, 'ub' : 20},
+            {'name' : 'height', 'type' : 'num', 'lb' : -100, 'ub' : 100},
+        ]
+        space = DesignSpace().parse(space_config)
+
+        hebo = HEBOSearch(space, metric="mean_loss", mode="min")
+        tuner = tune.Tuner(
+            trainable_function,
+            tune_config=tune.TuneConfig(
+                search_alg=hebo
+            )
+        )
+        tuner.fit()
+
+    """
+
+    def __init__(
+        self,
+        space: Optional[
+            Union[Dict, "hebo.design_space.design_space.DesignSpace"]
+        ] = None,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        points_to_evaluate: Optional[List[Dict]] = None,
+        evaluated_rewards: Optional[List] = None,
+        random_state_seed: Optional[int] = None,
+        max_concurrent: int = 8,
+        **kwargs,
+    ):
+        assert hebo is not None, (
+            "HEBO must be installed! You can install HEBO with"
+            " the command: `pip install 'HEBO>=0.2.0'`."
+            "This error may also be caused if HEBO"
+            " dependencies have bad versions. Try updating HEBO"
+            " first."
+        )
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+        assert (
+            isinstance(max_concurrent, int) and max_concurrent >= 1
+        ), "`max_concurrent` must be an integer and at least 1."
+        if random_state_seed is not None:
+            assert isinstance(
+                random_state_seed, int
+            ), "random_state_seed must be None or int, got '{}'.".format(
+                type(random_state_seed)
+            )
+        super(HEBOSearch, self).__init__(metric=metric, mode=mode)
+
+        if isinstance(space, dict) and space:
+            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
+            if resolved_vars:
+                raise TypeError(SPACE_ERROR_MESSAGE)
+            if domain_vars or grid_vars:
+                logger.warning(
+                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))
+                )
+                space = self.convert_search_space(space)
+        elif space is not None and not isinstance(
+            space, hebo.design_space.design_space.DesignSpace
+        ):
+            raise TypeError(SPACE_ERROR_MESSAGE + " Got {}.".format(type(space)))
+
+        self._hebo_config = kwargs
+        self._random_state_seed = random_state_seed
+        self._space = space
+        self._points_to_evaluate = points_to_evaluate
+        self._evaluated_rewards = evaluated_rewards
+        self._initial_points = []
+        self._live_trial_mapping = {}
+
+        self._max_concurrent = max_concurrent
+        self._suggestions_cache = []
+        self._batch_filled = False
+
+        self._opt = None
+        if space:
+            self._setup_optimizer()
+
+    def set_max_concurrency(self, max_concurrent: int) -> bool:
+        self._max_concurrent = max_concurrent
+        return True
+
+    def _setup_optimizer(self):
+        # HEBO internally minimizes, so "max" => -1
+        if self._mode == "max":
+            self._metric_op = -1.0
+        elif self._mode == "min":
+            self._metric_op = 1.0
+
+        if self._metric is None and self._mode:
+            # If only a mode was passed, use anonymous metric
+            self._metric = DEFAULT_METRIC
+
+        if not isinstance(self._space, hebo.design_space.design_space.DesignSpace):
+            raise ValueError(
+                f"Invalid search space: {type(self._space)}. Either pass a "
+                f"valid search space to the `HEBOSearch` class or pass "
+                f"a `param_space` parameter to `tune.Tuner()`"
+            )
+
+        if self._space.num_paras <= 0:
+            raise ValueError(
+                "Got empty search space. Please make sure to pass "
+                "a valid search space with at least one parameter to "
+                "`HEBOSearch`"
+            )
+
+        if self._random_state_seed is not None:
+            np.random.seed(self._random_state_seed)
+            torch.random.manual_seed(self._random_state_seed)
+
+        self._opt = hebo.optimizers.hebo.HEBO(space=self._space, **self._hebo_config)
+
+        if self._points_to_evaluate:
+            validate_warmstart(
+                self._space.para_names,
+                self._points_to_evaluate,
+                self._evaluated_rewards,
+            )
+            if self._evaluated_rewards:
+                self._opt.observe(
+                    pd.DataFrame(self._points_to_evaluate),
+                    np.array(self._evaluated_rewards) * self._metric_op,
+                )
+            else:
+                self._initial_points = self._points_to_evaluate
+
+    def set_search_properties(
+        self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
+    ) -> bool:
+        if self._opt:
+            return False
+        space = self.convert_search_space(config)
+        self._space = space
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        self._setup_optimizer()
+        return True
+
+    def suggest(self, trial_id: str) -> Optional[Dict]:
+        if not self._opt:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+            )
+
+        if not self._metric or not self._mode:
+            raise RuntimeError(
+                UNDEFINED_METRIC_MODE.format(
+                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
+                )
+            )
+
+        if not self._live_trial_mapping:
+            self._batch_filled = False
+
+        if self._initial_points:
+            params = self._initial_points.pop(0)
+            suggestion = pd.DataFrame([params], index=[0])
+        else:
+            if (
+                self._batch_filled
+                or len(self._live_trial_mapping) >= self._max_concurrent
+            ):
+                return None
+            if not self._suggestions_cache:
+                suggestion = self._opt.suggest(n_suggestions=self._max_concurrent)
+                self._suggestions_cache = suggestion.to_dict("records")
+            params = self._suggestions_cache.pop(0)
+            suggestion = pd.DataFrame([params], index=[0])
+        self._live_trial_mapping[trial_id] = suggestion
+        if len(self._live_trial_mapping) >= self._max_concurrent:
+            self._batch_filled = True
+        return unflatten_dict(params)
+
+    def on_trial_complete(
+        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
+    ):
+        """Notification for the completion of trial.
+
+        HEBO always minimizes."""
+
+        if result:
+            self._process_result(trial_id, result)
+        self._live_trial_mapping.pop(trial_id)
+
+    def _process_result(self, trial_id: str, result: Dict):
+        trial_info = self._live_trial_mapping[trial_id]
+        if result and not is_nan_or_inf(result[self._metric]):
+            self._opt.observe(
+                trial_info, np.array([self._metric_op * result[self._metric]])
+            )
+
+    def add_evaluated_point(
+        self,
+        parameters: Dict,
+        value: float,
+        error: bool = False,
+        pruned: bool = False,
+        intermediate_values: Optional[List[float]] = None,
+    ):
+        if intermediate_values:
+            logger.warning("HEBO doesn't use intermediate_values. Ignoring.")
+        if not error and not pruned:
+            self._opt.observe(
+                pd.DataFrame(
+                    [
+                        {
+                            k: v
+                            for k, v in parameters.items()
+                            if k in self._opt.space.para_names
+                        }
+                    ]
+                ),
+                np.array([value]) * self._metric_op,
+            )
+        else:
+            logger.warning(
+                "Only non errored and non pruned points can be added to HEBO."
+            )
+
+    def save(self, checkpoint_path: str):
+        """Storing current optimizer state."""
+        if self._random_state_seed is not None:
+            numpy_random_state = np.random.get_state()
+            torch_random_state = torch.get_rng_state()
+        else:
+            numpy_random_state = None
+            torch_random_state = None
+        save_object = self.__dict__.copy()
+        save_object["__numpy_random_state"] = numpy_random_state
+        save_object["__torch_random_state"] = torch_random_state
+        with open(checkpoint_path, "wb") as f:
+            pickle.dump(save_object, f)
+
+    def restore(self, checkpoint_path: str):
+        """Restoring current optimizer state."""
+        with open(checkpoint_path, "rb") as f:
+            save_object = pickle.load(f)
+
+        if isinstance(save_object, dict):
+            numpy_random_state = save_object.pop("__numpy_random_state", None)
+            torch_random_state = save_object.pop("__torch_random_state", None)
+            self.__dict__.update(save_object)
+        else:
+            # Backwards compatibility
+            (
+                self._opt,
+                self._initial_points,
+                numpy_random_state,
+                torch_random_state,
+                self._live_trial_mapping,
+                self._max_concurrent,
+                self._suggestions_cache,
+                self._space,
+                self._hebo_config,
+                self._batch_filled,
+            ) = save_object
+        if numpy_random_state is not None:
+            np.random.set_state(numpy_random_state)
+        if torch_random_state is not None:
+            torch.random.set_rng_state(torch_random_state)
+
+    @staticmethod
+    def convert_search_space(spec: Dict, prefix: str = "") -> Dict:
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        params = []
+
+        if not domain_vars and not grid_vars:
+            return {}
+
+        if grid_vars:
+            raise ValueError(
+                "Grid search parameters cannot be automatically converted "
+                "to a HEBO search space."
+            )
+
+        def resolve_value(par: str, domain: Domain):
+            sampler = domain.get_sampler()
+            if isinstance(sampler, Quantized):
+                logger.warning(
+                    "HEBO search does not support quantization. "
+                    "Dropped quantization."
+                )
+                sampler = sampler.get_sampler()
+
+            if isinstance(domain, Float):
+                if isinstance(sampler, LogUniform):
+                    return {
+                        "name": par,
+                        "type": "pow",
+                        "lb": domain.lower,
+                        "ub": domain.upper,
+                        "base": sampler.base,
+                    }
+                elif isinstance(sampler, Uniform):
+                    return {
+                        "name": par,
+                        "type": "num",
+                        "lb": domain.lower,
+                        "ub": domain.upper,
+                    }
+
+            elif isinstance(domain, Integer):
+                if isinstance(sampler, LogUniform):
+                    return {
+                        "name": par,
+                        "type": "pow_int",
+                        "lb": domain.lower,
+                        "ub": domain.upper - 1,  # Upper bound exclusive
+                        "base": sampler.base,
+                    }
+                elif isinstance(sampler, Uniform):
+                    return {
+                        "name": par,
+                        "type": "int",
+                        "lb": domain.lower,
+                        "ub": domain.upper - 1,  # Upper bound exclusive
+                    }
+            elif isinstance(domain, Categorical):
+                return {
+                    "name": par,
+                    "type": "cat",
+                    "categories": list(domain.categories),
+                }
+
+            raise ValueError(
+                "HEBO does not support parameters of type "
+                "`{}` with samplers of type `{}`".format(
+                    type(domain).__name__, type(domain.sampler).__name__
+                )
+            )
+
+        for path, domain in domain_vars:
+            par = "/".join([str(p) for p in ((prefix,) + path if prefix else path)])
+            value = resolve_value(par, domain)
+            params.append(value)
+
+        return hebo.design_space.design_space.DesignSpace().parse(params)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b142015f04bf2eec6b1be7e54d545621dfe2953
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__init__.py
@@ -0,0 +1,3 @@
+from ray.tune.search.hyperopt.hyperopt_search import HyperOptSearch
+
+__all__ = ["HyperOptSearch"]
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5862d1b541e0f639ad4b322b0f3d93456a4dc681
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__pycache__/hyperopt_search.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__pycache__/hyperopt_search.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43a179631561b91b25931805a35e21c23af16e92
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/__pycache__/hyperopt_search.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/hyperopt_search.py b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/hyperopt_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..4988325dde2d13fe04b32d97b5e71b2b320a0fb2
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/hyperopt/hyperopt_search.py
@@ -0,0 +1,559 @@
+import copy
+import logging
+from functools import partial
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+# Use cloudpickle instead of pickle to make lambda funcs in HyperOpt pickleable
+from ray import cloudpickle
+from ray.tune.error import TuneError
+from ray.tune.result import DEFAULT_METRIC
+from ray.tune.search import (
+    UNDEFINED_METRIC_MODE,
+    UNDEFINED_SEARCH_SPACE,
+    UNRESOLVED_SEARCH_SPACE,
+    Searcher,
+)
+from ray.tune.search.sample import (
+    Categorical,
+    Domain,
+    Float,
+    Integer,
+    LogUniform,
+    Normal,
+    Quantized,
+    Uniform,
+)
+from ray.tune.search.variant_generator import assign_value, parse_spec_vars
+from ray.tune.utils import flatten_dict
+
+try:
+    hyperopt_logger = logging.getLogger("hyperopt")
+    hyperopt_logger.setLevel(logging.WARNING)
+    import hyperopt as hpo
+    from hyperopt.pyll import Apply
+except ImportError:
+    hpo = None
+    Apply = None
+
+
+logger = logging.getLogger(__name__)
+
+
+HYPEROPT_UNDEFINED_DETAILS = (
+    " This issue can also come up with HyperOpt if your search space only "
+    "contains constant variables, which is not supported by HyperOpt. In that case, "
+    "don't pass any searcher or add sample variables to the search space."
+)
+
+
+class HyperOptSearch(Searcher):
+    """A wrapper around HyperOpt to provide trial suggestions.
+
+    HyperOpt a Python library for serial and parallel optimization
+    over awkward search spaces, which may include real-valued, discrete,
+    and conditional dimensions. More info can be found at
+    http://hyperopt.github.io/hyperopt.
+
+    HyperOptSearch uses the Tree-structured Parzen Estimators algorithm,
+    though it can be trivially extended to support any algorithm HyperOpt
+    supports.
+
+    To use this search algorithm, you will need to install HyperOpt:
+
+    .. code-block:: bash
+
+        pip install -U hyperopt
+
+
+    Parameters:
+        space: HyperOpt configuration. Parameters will be sampled
+            from this configuration and will be used to override
+            parameters generated in the variant generation process.
+        metric: The training result objective value attribute. If None
+            but a mode was passed, the anonymous metric `_metric` will be used
+            per default.
+        mode: One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute.
+        points_to_evaluate: Initial parameter suggestions to be run
+            first. This is for when you already have some good parameters
+            you want to run first to help the algorithm make better suggestions
+            for future parameters. Needs to be a list of dicts containing the
+            configurations.
+        n_initial_points: number of random evaluations of the
+            objective function before starting to aproximate it with
+            tree parzen estimators. Defaults to 20.
+        random_state_seed: seed for reproducible
+            results. Defaults to None.
+        gamma: parameter governing the tree parzen
+            estimators suggestion algorithm. Defaults to 0.25.
+
+    Tune automatically converts search spaces to HyperOpt's format:
+
+    .. code-block:: python
+
+        config = {
+            'width': tune.uniform(0, 20),
+            'height': tune.uniform(-100, 100),
+            'activation': tune.choice(["relu", "tanh"])
+        }
+
+        current_best_params = [{
+            'width': 10,
+            'height': 0,
+            'activation': "relu",
+        }]
+
+        hyperopt_search = HyperOptSearch(
+            metric="mean_loss", mode="min",
+            points_to_evaluate=current_best_params)
+
+        tuner = tune.Tuner(
+            trainable,
+            tune_config=tune.TuneConfig(
+                search_alg=hyperopt_search
+            ),
+            param_space=config
+        )
+        tuner.fit()
+
+    If you would like to pass the search space manually, the code would
+    look like this:
+
+    .. code-block:: python
+
+        space = {
+            'width': hp.uniform('width', 0, 20),
+            'height': hp.uniform('height', -100, 100),
+            'activation': hp.choice("activation", ["relu", "tanh"])
+        }
+
+        current_best_params = [{
+            'width': 10,
+            'height': 0,
+            'activation': "relu",
+        }]
+
+        hyperopt_search = HyperOptSearch(
+            space, metric="mean_loss", mode="min",
+            points_to_evaluate=current_best_params)
+
+        tuner = tune.Tuner(
+            trainable,
+            tune_config=tune.TuneConfig(
+                search_alg=hyperopt_search
+            ),
+        )
+        tuner.fit()
+
+    """
+
+    def __init__(
+        self,
+        space: Optional[Dict] = None,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        points_to_evaluate: Optional[List[Dict]] = None,
+        n_initial_points: int = 20,
+        random_state_seed: Optional[int] = None,
+        gamma: float = 0.25,
+    ):
+        assert (
+            hpo is not None
+        ), "HyperOpt must be installed! Run `pip install hyperopt`."
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+        super(HyperOptSearch, self).__init__(
+            metric=metric,
+            mode=mode,
+        )
+        # hyperopt internally minimizes, so "max" => -1
+        if mode == "max":
+            self.metric_op = -1.0
+        elif mode == "min":
+            self.metric_op = 1.0
+
+        if n_initial_points is None:
+            self.algo = hpo.tpe.suggest
+        else:
+            self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points)
+        if gamma is not None:
+            self.algo = partial(self.algo, gamma=gamma)
+
+        self._points_to_evaluate = copy.deepcopy(points_to_evaluate)
+
+        self._live_trial_mapping = {}
+        self.rstate = np.random.RandomState(random_state_seed)
+
+        self.domain = None
+        if isinstance(space, dict) and space:
+            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
+            if domain_vars or grid_vars:
+                logger.warning(
+                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))
+                )
+                space = self.convert_search_space(space)
+            self._space = space
+            self._setup_hyperopt()
+
+    def _setup_hyperopt(self) -> None:
+        from hyperopt.fmin import generate_trials_to_calculate
+
+        if not self._space:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+                + HYPEROPT_UNDEFINED_DETAILS
+            )
+
+        if self._metric is None and self._mode:
+            # If only a mode was passed, use anonymous metric
+            self._metric = DEFAULT_METRIC
+
+        if self._points_to_evaluate is None:
+            self._hpopt_trials = hpo.Trials()
+            self._points_to_evaluate = 0
+        else:
+            assert isinstance(self._points_to_evaluate, (list, tuple))
+
+            for i in range(len(self._points_to_evaluate)):
+                config = self._points_to_evaluate[i]
+                self._convert_categories_to_indices(config)
+            # HyperOpt treats initial points as LIFO, reverse to get FIFO
+            self._points_to_evaluate = list(reversed(self._points_to_evaluate))
+            self._hpopt_trials = generate_trials_to_calculate(self._points_to_evaluate)
+            self._hpopt_trials.refresh()
+            self._points_to_evaluate = len(self._points_to_evaluate)
+
+        self.domain = hpo.Domain(lambda spc: spc, self._space)
+
+    def _convert_categories_to_indices(self, config) -> None:
+        """Convert config parameters for categories into hyperopt-compatible
+        representations where instead the index of the category is expected."""
+
+        def _lookup(config_dict, space_dict, key):
+            if isinstance(config_dict[key], dict):
+                for k in config_dict[key]:
+                    _lookup(config_dict[key], space_dict[key], k)
+            else:
+                if (
+                    key in space_dict
+                    and isinstance(space_dict[key], hpo.base.pyll.Apply)
+                    and space_dict[key].name == "switch"
+                ):
+                    if len(space_dict[key].pos_args) > 0:
+                        categories = [
+                            a.obj
+                            for a in space_dict[key].pos_args[1:]
+                            if a.name == "literal"
+                        ]
+                        try:
+                            idx = categories.index(config_dict[key])
+                        except ValueError as exc:
+                            msg = (
+                                f"Did not find category with value "
+                                f"`{config_dict[key]}` in "
+                                f"hyperopt parameter `{key}`. "
+                            )
+
+                            if isinstance(config_dict[key], int):
+                                msg += (
+                                    "In previous versions, a numerical "
+                                    "index was expected for categorical "
+                                    "values of `points_to_evaluate`, "
+                                    "but in ray>=1.2.0, the categorical "
+                                    "value is expected to be directly "
+                                    "provided. "
+                                )
+
+                            msg += "Please make sure the specified category is valid."
+                            raise ValueError(msg) from exc
+                        config_dict[key] = idx
+
+        for k in config:
+            _lookup(config, self._space, k)
+
+    def set_search_properties(
+        self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
+    ) -> bool:
+        if self.domain:
+            return False
+        space = self.convert_search_space(config)
+        self._space = space
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        self.metric_op = -1.0 if self._mode == "max" else 1.0
+
+        self._setup_hyperopt()
+        return True
+
+    def suggest(self, trial_id: str) -> Optional[Dict]:
+        if not self.domain:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+                + HYPEROPT_UNDEFINED_DETAILS
+            )
+        if not self._metric or not self._mode:
+            raise RuntimeError(
+                UNDEFINED_METRIC_MODE.format(
+                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
+                )
+            )
+
+        if self._points_to_evaluate > 0:
+            using_point_to_evaluate = True
+            new_trial = self._hpopt_trials.trials[self._points_to_evaluate - 1]
+            self._points_to_evaluate -= 1
+        else:
+            using_point_to_evaluate = False
+            new_ids = self._hpopt_trials.new_trial_ids(1)
+            self._hpopt_trials.refresh()
+
+            # Get new suggestion from Hyperopt
+            new_trials = self.algo(
+                new_ids,
+                self.domain,
+                self._hpopt_trials,
+                self.rstate.randint(2**31 - 1),
+            )
+            self._hpopt_trials.insert_trial_docs(new_trials)
+            self._hpopt_trials.refresh()
+            new_trial = new_trials[0]
+        self._live_trial_mapping[trial_id] = (new_trial["tid"], new_trial)
+
+        # Taken from HyperOpt.base.evaluate
+        config = hpo.base.spec_from_misc(new_trial["misc"])
+
+        # We have to flatten nested spaces here so parameter names match
+        config = flatten_dict(config, flatten_list=True)
+
+        ctrl = hpo.base.Ctrl(self._hpopt_trials, current_trial=new_trial)
+        memo = self.domain.memo_from_config(config)
+        hpo.utils.use_obj_for_literal_in_memo(
+            self.domain.expr, ctrl, hpo.base.Ctrl, memo
+        )
+
+        try:
+            suggested_config = hpo.pyll.rec_eval(
+                self.domain.expr,
+                memo=memo,
+                print_node_on_error=self.domain.rec_eval_print_node_on_error,
+            )
+        except (AssertionError, TypeError) as e:
+            if using_point_to_evaluate and (
+                isinstance(e, AssertionError) or "GarbageCollected" in str(e)
+            ):
+                raise ValueError(
+                    "HyperOpt encountered a GarbageCollected switch argument. "
+                    "Usually this is caused by a config in "
+                    "`points_to_evaluate` "
+                    "missing a key present in `space`. Ensure that "
+                    "`points_to_evaluate` contains "
+                    "all non-constant keys from `space`.\n"
+                    "Config from `points_to_evaluate`: "
+                    f"{config}\n"
+                    "HyperOpt search space: "
+                    f"{self._space}"
+                ) from e
+            raise e
+        return copy.deepcopy(suggested_config)
+
+    def on_trial_result(self, trial_id: str, result: Dict) -> None:
+        ho_trial = self._get_hyperopt_trial(trial_id)
+        if ho_trial is None:
+            return
+        now = hpo.utils.coarse_utcnow()
+        ho_trial["book_time"] = now
+        ho_trial["refresh_time"] = now
+
+    def on_trial_complete(
+        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
+    ) -> None:
+        """Notification for the completion of trial.
+
+        The result is internally negated when interacting with HyperOpt
+        so that HyperOpt can "maximize" this value, as it minimizes on default.
+        """
+        ho_trial = self._get_hyperopt_trial(trial_id)
+        if ho_trial is None:
+            return
+        ho_trial["refresh_time"] = hpo.utils.coarse_utcnow()
+        if error:
+            ho_trial["state"] = hpo.base.JOB_STATE_ERROR
+            ho_trial["misc"]["error"] = (str(TuneError), "Tune Error")
+            self._hpopt_trials.refresh()
+        elif result:
+            self._process_result(trial_id, result)
+        del self._live_trial_mapping[trial_id]
+
+    def _process_result(self, trial_id: str, result: Dict) -> None:
+        ho_trial = self._get_hyperopt_trial(trial_id)
+        if not ho_trial:
+            return
+        ho_trial["refresh_time"] = hpo.utils.coarse_utcnow()
+
+        ho_trial["state"] = hpo.base.JOB_STATE_DONE
+        hp_result = self._to_hyperopt_result(result)
+        ho_trial["result"] = hp_result
+        self._hpopt_trials.refresh()
+
+    def _to_hyperopt_result(self, result: Dict) -> Dict:
+        try:
+            return {"loss": self.metric_op * result[self.metric], "status": "ok"}
+        except KeyError as e:
+            raise RuntimeError(
+                f"Hyperopt expected to see the metric `{self.metric}` in the "
+                f"last result, but it was not found. To fix this, make "
+                f"sure your call to `tune.report` or your return value of "
+                f"your trainable class `step()` contains the above metric "
+                f"as a key."
+            ) from e
+
+    def _get_hyperopt_trial(self, trial_id: str) -> Optional[Dict]:
+        if trial_id not in self._live_trial_mapping:
+            return
+        hyperopt_tid = self._live_trial_mapping[trial_id][0]
+        return [t for t in self._hpopt_trials.trials if t["tid"] == hyperopt_tid][0]
+
+    def get_state(self) -> Dict:
+        return {
+            "hyperopt_trials": self._hpopt_trials,
+            "rstate": self.rstate.get_state(),
+        }
+
+    def set_state(self, state: Dict) -> None:
+        self._hpopt_trials = state["hyperopt_trials"]
+        self.rstate.set_state(state["rstate"])
+
+    def save(self, checkpoint_path: str) -> None:
+        save_object = self.__dict__.copy()
+        save_object["__rstate"] = self.rstate.get_state()
+        with open(checkpoint_path, "wb") as f:
+            cloudpickle.dump(save_object, f)
+
+    def restore(self, checkpoint_path: str) -> None:
+        with open(checkpoint_path, "rb") as f:
+            save_object = cloudpickle.load(f)
+
+        if "__rstate" not in save_object:
+            # Backwards compatibility
+            self.set_state(save_object)
+        else:
+            self.rstate.set_state(save_object.pop("__rstate"))
+            self.__dict__.update(save_object)
+
+    @staticmethod
+    def convert_search_space(spec: Dict, prefix: str = "") -> Dict:
+        spec = copy.deepcopy(spec)
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        if not domain_vars and not grid_vars:
+            return {}
+
+        if grid_vars:
+            raise ValueError(
+                "Grid search parameters cannot be automatically converted "
+                "to a HyperOpt search space."
+            )
+
+        def resolve_value(par: str, domain: Domain) -> Any:
+            quantize = None
+
+            sampler = domain.get_sampler()
+            if isinstance(sampler, Quantized):
+                quantize = sampler.q
+                sampler = sampler.sampler
+
+            if isinstance(domain, Float):
+                if isinstance(sampler, LogUniform):
+                    if quantize:
+                        return hpo.hp.qloguniform(
+                            par, np.log(domain.lower), np.log(domain.upper), quantize
+                        )
+                    return hpo.hp.loguniform(
+                        par, np.log(domain.lower), np.log(domain.upper)
+                    )
+                elif isinstance(sampler, Uniform):
+                    if quantize:
+                        return hpo.hp.quniform(
+                            par, domain.lower, domain.upper, quantize
+                        )
+                    return hpo.hp.uniform(par, domain.lower, domain.upper)
+                elif isinstance(sampler, Normal):
+                    if quantize:
+                        return hpo.hp.qnormal(par, sampler.mean, sampler.sd, quantize)
+                    return hpo.hp.normal(par, sampler.mean, sampler.sd)
+
+            elif isinstance(domain, Integer):
+                if isinstance(sampler, LogUniform):
+                    if quantize:
+                        return hpo.base.pyll.scope.int(
+                            hpo.hp.qloguniform(
+                                par,
+                                np.log(domain.lower),
+                                np.log(domain.upper),
+                                quantize,
+                            )
+                        )
+                    return hpo.base.pyll.scope.int(
+                        hpo.hp.qloguniform(
+                            par, np.log(domain.lower), np.log(domain.upper - 1), 1.0
+                        )
+                    )
+                elif isinstance(sampler, Uniform):
+                    if quantize:
+                        return hpo.base.pyll.scope.int(
+                            hpo.hp.quniform(
+                                par, domain.lower, domain.upper - 1, quantize
+                            )
+                        )
+                    return hpo.hp.uniformint(par, domain.lower, high=domain.upper - 1)
+            elif isinstance(domain, Categorical):
+                if isinstance(sampler, Uniform):
+                    return hpo.hp.choice(
+                        par,
+                        [
+                            (
+                                HyperOptSearch.convert_search_space(
+                                    category, prefix=par
+                                )
+                                if isinstance(category, dict)
+                                else (
+                                    HyperOptSearch.convert_search_space(
+                                        dict(enumerate(category)), prefix=f"{par}/{i}"
+                                    )
+                                    if isinstance(category, list)
+                                    and len(category) > 0
+                                    and isinstance(category[0], Domain)
+                                    else (
+                                        resolve_value(f"{par}/{i}", category)
+                                        if isinstance(category, Domain)
+                                        else category
+                                    )
+                                )
+                            )
+                            for i, category in enumerate(domain.categories)
+                        ],
+                    )
+
+            raise ValueError(
+                "HyperOpt does not support parameters of type "
+                "`{}` with samplers of type `{}`".format(
+                    type(domain).__name__, type(domain.sampler).__name__
+                )
+            )
+
+        for path, domain in domain_vars:
+            par = "/".join([str(p) for p in ((prefix,) + path if prefix else path)])
+            value = resolve_value(par, domain)
+            assign_value(spec, path, value)
+
+        return spec
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..44ab8e345754da48205fbe712e4bb787a5ddfc2a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__init__.py
@@ -0,0 +1,3 @@
+from ray.tune.search.zoopt.zoopt_search import ZOOptSearch
+
+__all__ = ["ZOOptSearch"]
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3eff046e1da97aa32dc3926fb1a4b720eed788ea
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__pycache__/zoopt_search.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__pycache__/zoopt_search.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49fd0a08851f184730cbaf95c2ee08003f3b32bf
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/__pycache__/zoopt_search.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/zoopt_search.py b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/zoopt_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5ec4e423f97e6b89decd0f55fbcbe3b7ec15a87
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/search/zoopt/zoopt_search.py
@@ -0,0 +1,379 @@
+import copy
+import logging
+from typing import Dict, List, Optional, Tuple
+
+import ray
+import ray.cloudpickle as pickle
+from ray.tune.result import DEFAULT_METRIC
+from ray.tune.search import (
+    UNDEFINED_METRIC_MODE,
+    UNDEFINED_SEARCH_SPACE,
+    UNRESOLVED_SEARCH_SPACE,
+    Searcher,
+)
+from ray.tune.search.sample import (
+    Categorical,
+    Domain,
+    Float,
+    Integer,
+    Quantized,
+    Uniform,
+)
+from ray.tune.search.variant_generator import parse_spec_vars
+from ray.tune.utils.util import unflatten_dict
+
+try:
+    import zoopt
+    from zoopt import Solution, ValueType
+except ImportError:
+    zoopt = None
+    Solution = ValueType = None
+
+logger = logging.getLogger(__name__)
+
+
+class ZOOptSearch(Searcher):
+    """A wrapper around ZOOpt to provide trial suggestions.
+
+    ZOOptSearch is a library for derivative-free optimization. It is backed by
+    the `ZOOpt <https://github.com/polixir/ZOOpt>`__ package. Currently,
+    Asynchronous Sequential RAndomized COordinate Shrinking (ASRacos)
+    is implemented in Tune.
+
+    To use ZOOptSearch, install zoopt (>=0.4.1): ``pip install -U zoopt``.
+
+    Tune automatically converts search spaces to ZOOpt"s format:
+
+    .. code-block:: python
+
+        from ray import train, tune
+        from ray.tune.search.zoopt import ZOOptSearch
+
+        "config": {
+            "iterations": 10,  # evaluation times
+            "width": tune.uniform(-10, 10),
+            "height": tune.uniform(-10, 10)
+        }
+
+        zoopt_search_config = {
+            "parallel_num": 8,  # how many workers to parallel
+        }
+
+        zoopt_search = ZOOptSearch(
+            algo="Asracos",  # only support Asracos currently
+            budget=20,  # must match `num_samples` in `tune.TuneConfig()`.
+            dim_dict=dim_dict,
+            metric="mean_loss",
+            mode="min",
+            **zoopt_search_config
+        )
+
+        tuner = tune.Tuner(
+            my_objective,
+            tune_config=tune.TuneConfig(
+                search_alg=zoopt_search,
+                num_samples=20
+            ),
+            run_config=train.RunConfig(
+                name="zoopt_search",
+                stop={"timesteps_total": 10}
+            ),
+            param_space=config
+        )
+        tuner.fit()
+
+    If you would like to pass the search space manually, the code would
+    look like this:
+
+    .. code-block:: python
+
+        from ray import train, tune
+        from ray.tune.search.zoopt import ZOOptSearch
+        from zoopt import ValueType
+
+        dim_dict = {
+            "height": (ValueType.CONTINUOUS, [-10, 10], 1e-2),
+            "width": (ValueType.DISCRETE, [-10, 10], False),
+            "layers": (ValueType.GRID, [4, 8, 16])
+        }
+
+        "config": {
+            "iterations": 10,  # evaluation times
+        }
+
+        zoopt_search_config = {
+            "parallel_num": 8,  # how many workers to parallel
+        }
+
+        zoopt_search = ZOOptSearch(
+            algo="Asracos",  # only support Asracos currently
+            budget=20,  # must match `num_samples` in `tune.TuneConfig()`.
+            dim_dict=dim_dict,
+            metric="mean_loss",
+            mode="min",
+            **zoopt_search_config
+        )
+
+        tuner = tune.Tuner(
+            my_objective,
+            tune_config=tune.TuneConfig(
+                search_alg=zoopt_search,
+                num_samples=20
+            ),
+            run_config=train.RunConfig(
+                name="zoopt_search",
+                stop={"timesteps_total": 10}
+            ),
+        )
+        tuner.fit()
+
+    Parameters:
+        algo: To specify an algorithm in zoopt you want to use.
+            Only support ASRacos currently.
+        budget: Number of samples.
+        dim_dict: Dimension dictionary.
+            For continuous dimensions: (continuous, search_range, precision);
+            For discrete dimensions: (discrete, search_range, has_order);
+            For grid dimensions: (grid, grid_list).
+            More details can be found in zoopt package.
+        metric: The training result objective value attribute. If None
+            but a mode was passed, the anonymous metric `_metric` will be used
+            per default.
+        mode: One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute.
+        points_to_evaluate: Initial parameter suggestions to be run
+            first. This is for when you already have some good parameters
+            you want to run first to help the algorithm make better suggestions
+            for future parameters. Needs to be a list of dicts containing the
+            configurations.
+        parallel_num: How many workers to parallel. Note that initial
+            phase may start less workers than this number. More details can
+            be found in zoopt package.
+    """
+
+    optimizer = None
+
+    def __init__(
+        self,
+        algo: str = "asracos",
+        budget: Optional[int] = None,
+        dim_dict: Optional[Dict] = None,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        points_to_evaluate: Optional[List[Dict]] = None,
+        parallel_num: int = 1,
+        **kwargs
+    ):
+        assert (
+            zoopt is not None
+        ), "ZOOpt not found - please install zoopt by `pip install -U zoopt`."
+        assert budget is not None, "`budget` should not be None!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+        _algo = algo.lower()
+        assert _algo in [
+            "asracos",
+            "sracos",
+        ], "`algo` must be in ['asracos', 'sracos'] currently"
+
+        self._algo = _algo
+
+        if isinstance(dim_dict, dict) and dim_dict:
+            resolved_vars, domain_vars, grid_vars = parse_spec_vars(dim_dict)
+            if domain_vars or grid_vars:
+                logger.warning(
+                    UNRESOLVED_SEARCH_SPACE.format(par="dim_dict", cls=type(self))
+                )
+                dim_dict = self.convert_search_space(dim_dict, join=True)
+
+        self._dim_dict = dim_dict
+        self._budget = budget
+
+        self._metric = metric
+        if mode == "max":
+            self._metric_op = -1.0
+        elif mode == "min":
+            self._metric_op = 1.0
+
+        self._points_to_evaluate = copy.deepcopy(points_to_evaluate)
+
+        self._live_trial_mapping = {}
+
+        self._dim_keys = []
+        self.solution_dict = {}
+        self.best_solution_list = []
+        self.optimizer = None
+
+        self.kwargs = kwargs
+
+        self.parallel_num = parallel_num
+
+        super(ZOOptSearch, self).__init__(metric=self._metric, mode=mode)
+
+        if self._dim_dict:
+            self._setup_zoopt()
+
+    def _setup_zoopt(self):
+        if self._metric is None and self._mode:
+            # If only a mode was passed, use anonymous metric
+            self._metric = DEFAULT_METRIC
+
+        _dim_list = []
+        for k in self._dim_dict:
+            self._dim_keys.append(k)
+            _dim_list.append(self._dim_dict[k])
+
+        init_samples = None
+        if self._points_to_evaluate:
+            logger.warning(
+                "`points_to_evaluate` is ignored by ZOOpt in versions <= 0.4.1."
+            )
+            init_samples = [
+                Solution(x=tuple(point[dim] for dim in self._dim_keys))
+                for point in self._points_to_evaluate
+            ]
+        dim = zoopt.Dimension2(_dim_list)
+        par = zoopt.Parameter(budget=self._budget, init_samples=init_samples)
+        if self._algo == "sracos" or self._algo == "asracos":
+            from zoopt.algos.opt_algorithms.racos.sracos import SRacosTune
+
+            self.optimizer = SRacosTune(
+                dimension=dim,
+                parameter=par,
+                parallel_num=self.parallel_num,
+                **self.kwargs
+            )
+
+    def set_search_properties(
+        self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
+    ) -> bool:
+        if self._dim_dict:
+            return False
+        space = self.convert_search_space(config)
+        self._dim_dict = space
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        if self._mode == "max":
+            self._metric_op = -1.0
+        elif self._mode == "min":
+            self._metric_op = 1.0
+
+        self._setup_zoopt()
+        return True
+
+    def suggest(self, trial_id: str) -> Optional[Dict]:
+        if not self._dim_dict or not self.optimizer:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="dim_dict"
+                )
+            )
+        if not self._metric or not self._mode:
+            raise RuntimeError(
+                UNDEFINED_METRIC_MODE.format(
+                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
+                )
+            )
+
+        _solution = self.optimizer.suggest()
+
+        if _solution == "FINISHED":
+            if ray.__version__ >= "0.8.7":
+                return Searcher.FINISHED
+            else:
+                return None
+
+        if _solution:
+            self.solution_dict[str(trial_id)] = _solution
+            _x = _solution.get_x()
+            new_trial = dict(zip(self._dim_keys, _x))
+            self._live_trial_mapping[trial_id] = new_trial
+            return unflatten_dict(new_trial)
+
+    def on_trial_complete(
+        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
+    ):
+        """Notification for the completion of trial."""
+        if result:
+            _solution = self.solution_dict[str(trial_id)]
+            _best_solution_so_far = self.optimizer.complete(
+                _solution, self._metric_op * result[self._metric]
+            )
+            if _best_solution_so_far:
+                self.best_solution_list.append(_best_solution_so_far)
+
+        del self._live_trial_mapping[trial_id]
+
+    def save(self, checkpoint_path: str):
+        save_object = self.__dict__
+        with open(checkpoint_path, "wb") as outputFile:
+            pickle.dump(save_object, outputFile)
+
+    def restore(self, checkpoint_path: str):
+        with open(checkpoint_path, "rb") as inputFile:
+            save_object = pickle.load(inputFile)
+        self.__dict__.update(save_object)
+
+    @staticmethod
+    def convert_search_space(spec: Dict, join: bool = False) -> Dict[str, Tuple]:
+        spec = copy.deepcopy(spec)
+        resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
+
+        if not domain_vars and not grid_vars:
+            return {}
+
+        if grid_vars:
+            raise ValueError(
+                "Grid search parameters cannot be automatically converted "
+                "to a ZOOpt search space."
+            )
+
+        def resolve_value(domain: Domain) -> Tuple:
+            quantize = None
+
+            sampler = domain.get_sampler()
+            if isinstance(sampler, Quantized):
+                quantize = sampler.q
+                sampler = sampler.sampler
+
+            if isinstance(domain, Float):
+                precision = quantize or 1e-12
+                if isinstance(sampler, Uniform):
+                    return (
+                        ValueType.CONTINUOUS,
+                        [domain.lower, domain.upper],
+                        precision,
+                    )
+
+            elif isinstance(domain, Integer):
+                if isinstance(sampler, Uniform):
+                    return (ValueType.DISCRETE, [domain.lower, domain.upper - 1], True)
+
+            elif isinstance(domain, Categorical):
+                # Categorical variables would use ValueType.DISCRETE with
+                # has_partial_order=False, however, currently we do not
+                # keep track of category values and cannot automatically
+                # translate back and forth between them.
+                if isinstance(sampler, Uniform):
+                    return (ValueType.GRID, domain.categories)
+
+            raise ValueError(
+                "ZOOpt does not support parameters of type "
+                "`{}` with samplers of type `{}`".format(
+                    type(domain).__name__, type(domain.sampler).__name__
+                )
+            )
+
+        conv_spec = {
+            "/".join(path): resolve_value(domain) for path, domain in domain_vars
+        }
+
+        if join:
+            spec.update(conv_spec)
+            conv_spec = spec
+
+        return conv_spec