diff --git a/.gitattributes b/.gitattributes
index c9bca540b7213f83530a2a192c42adbeff2ffc39..22803b2f7573fc4eaca05dbeb43623704f04f900 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -179,3 +179,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/ray/rllib/algorithms/__pycache__/algorithm.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/ray/rllib/algorithms/__pycache__/algorithm_config.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/ray/rllib/env/__pycache__/multi_agent_episode.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/ray/tune/execution/__pycache__/tune_controller.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
diff --git a/.venv/lib/python3.11/site-packages/ray/train/__pycache__/_checkpoint.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/__pycache__/_checkpoint.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2dce00862e332ab2a2cb553f61e08480461d75af
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/__pycache__/_checkpoint.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/__pycache__/trainer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/__pycache__/trainer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..083310977436fe5aef1556f2a364fcec03ccc1f3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/__pycache__/trainer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf4483616962b69a53fb2db9d060764369989181
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/__pycache__/mlflow_simple_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/__pycache__/mlflow_simple_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edb0d02ab975a4c8f82a70fe37ac35c981b5c7f7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/__pycache__/mlflow_simple_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..646ffbdae23ac54aa80e2c68d14a0cfeb9c41dda
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_cifar_pbt_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_cifar_pbt_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19b09809bfc53f1c8138bdd2244243d71924a9bb
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_cifar_pbt_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e1b9637c8da3c1a4cb89e1ddeeea10b30c62cbfc
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_pytorch_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_pytorch_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e50ece0277ae04d39ab88e4ccec672cddf01591
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_pytorch_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_tune_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_tune_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03807fc21420a8ca745005fc6e794eaeb653417a
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/__pycache__/horovod_tune_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_cifar_pbt_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_cifar_pbt_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7be644d5de03bc7cda298ddaeba1f9417c6fea5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_cifar_pbt_example.py
@@ -0,0 +1,210 @@
+import os
+import tempfile
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+from torchvision.models import resnet18
+
+import ray
+import ray.cloudpickle as cpickle
+import ray.train.torch
+from ray import train, tune
+from ray.train import (
+    Checkpoint,
+    CheckpointConfig,
+    FailureConfig,
+    RunConfig,
+    ScalingConfig,
+)
+from ray.train.horovod import HorovodTrainer
+from ray.tune.schedulers import create_scheduler
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner
+from ray.tune.utils.release_test_util import ProgressCallback
+
+# The long running version starts 4 trials while only 2 can be run at a time.
+# Thus trials are paused and restored at all times so that every trial can make
+# progress. The PBT scheduler also applies perturbation and mutation,
+# which also involves pausing and restoring.
+# The intention is to stress test the pausing and restoring of trials,
+# especially that there should be no GPU memory leak.
+
+# TODO(ml-team): This test is very low signal at the moment.
+#  We should further trim it down.
+
+CIFAR10_STATS = {
+    "mean": (0.4914, 0.4822, 0.4465),
+    "std": (0.2023, 0.1994, 0.2010),
+}
+
+
+def train_loop_per_worker(config):
+    import horovod.torch as hvd
+
+    hvd.init()
+    device = ray.train.torch.get_device()
+    net = resnet18().to(device)
+    optimizer = torch.optim.SGD(
+        net.parameters(),
+        lr=config["lr"],
+    )
+    epoch = 0
+
+    checkpoint = train.get_checkpoint()
+    if checkpoint:
+        with checkpoint.as_directory() as checkpoint_dir:
+            with open(os.path.join(checkpoint_dir, "data.ckpt"), "rb") as fp:
+                checkpoint_dict = cpickle.load(fp)
+
+        model_state = checkpoint_dict["model_state"]
+        optimizer_state = checkpoint_dict["optimizer_state"]
+        epoch = checkpoint_dict["epoch"] + 1
+
+        net.load_state_dict(model_state)
+        optimizer.load_state_dict(optimizer_state)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = hvd.DistributedOptimizer(optimizer)
+    np.random.seed(1 + hvd.rank())
+    torch.manual_seed(1234)
+    # To ensure consistent initialization across workers,
+    hvd.broadcast_parameters(net.state_dict(), root_rank=0)
+    hvd.broadcast_optimizer_state(optimizer, root_rank=0)
+
+    trainset = ray.get(config["data"])
+
+    train_sampler = torch.utils.data.distributed.DistributedSampler(
+        trainset, num_replicas=hvd.size(), rank=hvd.rank()
+    )
+
+    # Note, don't set `num_workers` in DataLoader (not even 1),
+    # as that will separately start multiple processes (each corresponding to 1 worker)
+    # to load the data. This is known to cause issues with Ray.
+    trainloader = DataLoader(
+        trainset, batch_size=int(config["batch_size"]), sampler=train_sampler
+    )
+
+    for current_epoch in range(epoch, 40):  # loop over the dataset multiple times
+        running_loss = 0.0
+        epoch_steps = 0
+        for i, data in enumerate(trainloader):
+            # get the inputs; data is a list of [inputs, labels]
+            inputs, labels = data
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            # zero the parameter gradients
+            optimizer.zero_grad()
+
+            # forward + backward + optimize
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            # print statistics
+            running_loss += loss.item()
+            epoch_steps += 1
+
+            if i % 2000 == 1999:  # print every 2000 mini-batches
+                print(
+                    "[%d, %5d] loss: %.3f"
+                    % (current_epoch + 1, i + 1, running_loss / epoch_steps)
+                )
+
+            if config["smoke_test"]:
+                break
+
+        with tempfile.TemporaryDirectory() as checkpoint_dir:
+            with open(os.path.join(checkpoint_dir, "data.ckpt"), "wb") as fp:
+                cpickle.dump(
+                    dict(
+                        model_state=net.state_dict(),
+                        optimizer_state=optimizer.state_dict(),
+                        epoch=current_epoch,
+                    ),
+                    fp,
+                )
+            checkpoint = Checkpoint.from_directory(checkpoint_dir)
+            train.report(dict(loss=running_loss / epoch_steps), checkpoint=checkpoint)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--smoke-test", action="store_true", help=("Finish quickly for testing.")
+    )
+    args = parser.parse_args()
+
+    if args.smoke_test:
+        ray.init()
+    else:
+        ray.init(address="auto")  # assumes ray is started with ray up
+
+    transform_train = transforms.Compose(
+        [
+            transforms.RandomCrop(32, padding=4),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Normalize(CIFAR10_STATS["mean"], CIFAR10_STATS["std"]),
+        ]
+    )  # meanstd transformation
+
+    dataset = torchvision.datasets.CIFAR10(
+        root="/tmp/data_cifar", train=True, download=True, transform=transform_train
+    )
+
+    horovod_trainer = HorovodTrainer(
+        train_loop_per_worker=train_loop_per_worker,
+        scaling_config=ScalingConfig(
+            use_gpu=False if args.smoke_test else True,
+            num_workers=2,
+        ),
+        train_loop_config={"batch_size": 64, "data": ray.put(dataset)},
+    )
+
+    # ensure that checkpointing works.
+    pbt = create_scheduler(
+        "pbt",
+        perturbation_interval=1,  # To make perturb more often.
+        hyperparam_mutations={
+            "train_loop_config": {"lr": tune.uniform(0.001, 0.1)},
+        },
+    )
+
+    tuner = Tuner(
+        horovod_trainer,
+        param_space={
+            "train_loop_config": {
+                "lr": 0.1
+                if args.smoke_test
+                else tune.grid_search([0.1 * i for i in range(1, 5)]),  # 4 trials
+                "smoke_test": args.smoke_test,
+            }
+        },
+        tune_config=TuneConfig(
+            num_samples=2 if args.smoke_test else 1,
+            metric="loss",
+            mode="min",
+            scheduler=pbt,
+        ),
+        run_config=RunConfig(
+            stop={"training_iteration": 1} if args.smoke_test else None,
+            failure_config=FailureConfig(fail_fast=False),
+            checkpoint_config=CheckpointConfig(num_to_keep=1),
+            callbacks=[ProgressCallback()],
+        ),
+    )
+
+    result_grid = tuner.fit()
+
+    # Make sure trials do not fail.
+    for result in result_grid:
+        assert not result.error
+
+    print("Best hyperparameters found were: ", result_grid.get_best_result().config)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..236814aa8afccd775c3b67ab96edf8d3da93a0c8
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_example.py
@@ -0,0 +1,286 @@
+import argparse
+import os
+
+import horovod.torch as hvd
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.utils.data.distributed
+from filelock import FileLock
+from torchvision import datasets, transforms
+
+import ray
+from ray import train
+from ray.train import ScalingConfig
+from ray.train.horovod import HorovodTrainer
+
+
+def metric_average(val, name):
+    tensor = torch.tensor(val)
+    avg_tensor = hvd.allreduce(tensor, name=name)
+    return avg_tensor.item()
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x)
+
+
+def setup(config):
+    data_dir = config.get("data_dir", None)
+    seed = config.get("seed", 42)
+    batch_size = config.get("batch_size", 64)
+    use_adasum = config.get("use_adasum", False)
+    lr = config.get("lr", 0.01)
+    momentum = config.get("momentum", 0.5)
+    use_cuda = config.get("use_cuda", False)
+
+    # Horovod: initialize library.
+    hvd.init()
+    torch.manual_seed(seed)
+
+    if use_cuda:
+        # Horovod: pin GPU to local rank.
+        torch.cuda.set_device(hvd.local_rank())
+        torch.cuda.manual_seed(seed)
+
+    # Horovod: limit # of CPU threads to be used per worker.
+    torch.set_num_threads(1)
+
+    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}
+    data_dir = data_dir or "~/data"
+    with FileLock(os.path.expanduser("~/.horovod_lock")):
+        train_dataset = datasets.MNIST(
+            data_dir,
+            train=True,
+            download=True,
+            transform=transforms.Compose(
+                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+            ),
+        )
+    # Horovod: use DistributedSampler to partition the training data.
+    train_sampler = torch.utils.data.distributed.DistributedSampler(
+        train_dataset, num_replicas=hvd.size(), rank=hvd.rank()
+    )
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=batch_size, sampler=train_sampler, **kwargs
+    )
+
+    model = Net()
+
+    # By default, Adasum doesn't need scaling up learning rate.
+    lr_scaler = hvd.size() if not use_adasum else 1
+
+    if use_cuda:
+        # Move model to GPU.
+        model.cuda()
+        # If using GPU Adasum allreduce, scale learning rate by local_size.
+        if use_adasum and hvd.nccl_built():
+            lr_scaler = hvd.local_size()
+
+    # Horovod: scale learning rate by lr_scaler.
+    optimizer = optim.SGD(model.parameters(), lr=lr * lr_scaler, momentum=momentum)
+
+    # Horovod: wrap optimizer with DistributedOptimizer.
+    optimizer = hvd.DistributedOptimizer(
+        optimizer,
+        named_parameters=model.named_parameters(),
+        op=hvd.Adasum if use_adasum else hvd.Average,
+    )
+
+    return model, optimizer, train_loader, train_sampler
+
+
+def train_epoch(
+    model, optimizer, train_sampler, train_loader, epoch, log_interval, use_cuda
+):
+    loss = None
+    model.train()
+    # Horovod: set epoch to sampler for shuffling.
+    train_sampler.set_epoch(epoch)
+    for batch_idx, (data, target) in enumerate(train_loader):
+        if use_cuda:
+            data, target = data.cuda(), target.cuda()
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % log_interval == 0:
+            # Horovod: use train_sampler to determine the number of
+            # examples in this worker's partition.
+            print(
+                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+                    epoch,
+                    batch_idx * len(data),
+                    len(train_sampler),
+                    100.0 * batch_idx / len(train_loader),
+                    loss.item(),
+                )
+            )
+    return loss.item() if loss else None
+
+
+# Horovod function API.
+
+
+def train_func(config):
+    num_epochs = config.get("num_epochs", 10)
+    log_interval = config.get("log_interval", 10)
+    use_cuda = config.get("use_cuda", False)
+
+    model, optimizer, train_loader, train_sampler = setup(config)
+
+    for epoch in range(num_epochs):
+        loss = train_epoch(
+            model, optimizer, train_sampler, train_loader, epoch, log_interval, use_cuda
+        )
+        train.report(dict(loss=loss))
+
+
+def main(num_workers, use_gpu, kwargs):
+    trainer = HorovodTrainer(
+        train_func,
+        train_loop_config=kwargs,
+        scaling_config=ScalingConfig(use_gpu=use_gpu, num_workers=num_workers),
+    )
+    results = trainer.fit()
+    print(results.metrics)
+
+
+# Horovod Class API.
+
+
+class HorovodTrainClass:
+    def __init__(self, config):
+        self.log_interval = config.get("log_interval", 10)
+        self.use_cuda = config.get("use_cuda", False)
+
+        if self.use_cuda:
+            torch.cuda.set_device(hvd.local_rank())
+
+        self.model, self.optimizer, self.train_loader, self.train_sampler = setup(
+            config
+        )
+
+    def train(self, epoch):
+        loss = train_epoch(
+            self.model,
+            self.optimizer,
+            self.train_sampler,
+            self.train_loader,
+            epoch,
+            self.log_interval,
+            self.use_cuda,
+        )
+        return loss
+
+
+if __name__ == "__main__":
+    # Training settings
+    parser = argparse.ArgumentParser(
+        description="PyTorch MNIST Example",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=64,
+        metavar="N",
+        help="input batch size for training (default: 64)",
+    )
+    parser.add_argument(
+        "--num-epochs",
+        type=int,
+        default=5,
+        metavar="N",
+        help="number of epochs to train (default: 10)",
+    )
+    parser.add_argument(
+        "--lr",
+        type=float,
+        default=0.01,
+        metavar="LR",
+        help="learning rate (default: 0.01)",
+    )
+    parser.add_argument(
+        "--momentum",
+        type=float,
+        default=0.5,
+        metavar="M",
+        help="SGD momentum (default: 0.5)",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="enables CUDA training"
+    )
+    parser.add_argument(
+        "--seed", type=int, default=42, metavar="S", help="random seed (default: 42)"
+    )
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=10,
+        metavar="N",
+        help="how many batches to wait before logging training status",
+    )
+    parser.add_argument(
+        "--use-adasum",
+        action="store_true",
+        default=False,
+        help="use adasum algorithm to do reduction",
+    )
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        default=2,
+        help="Number of Ray workers to use for training.",
+    )
+    parser.add_argument(
+        "--data-dir",
+        help="location of the training dataset in the local filesystem ("
+        "will be downloaded if needed)",
+    )
+    parser.add_argument(
+        "--address",
+        required=False,
+        type=str,
+        default=None,
+        help="Address of Ray cluster.",
+    )
+
+    args = parser.parse_args()
+
+    if args.address:
+        ray.init(args.address)
+    else:
+        ray.init()
+
+    use_cuda = args.use_gpu if args.use_gpu is not None else False
+
+    kwargs = {
+        "data_dir": args.data_dir,
+        "seed": args.seed,
+        "use_cuda": use_cuda,
+        "batch_size": args.batch_size,
+        "use_adasum": args.use_adasum if args.use_adasum else False,
+        "lr": args.lr,
+        "momentum": args.momentum,
+        "num_epochs": args.num_epochs,
+        "log_interval": args.log_interval,
+    }
+
+    main(num_workers=args.num_workers, use_gpu=use_cuda, kwargs=kwargs)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_pytorch_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_pytorch_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d20ed51e9ef7e39c467948954816e91e509cff9c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_pytorch_example.py
@@ -0,0 +1,270 @@
+import argparse
+import os
+import tempfile
+
+import horovod.torch as hvd
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.utils.data.distributed
+from filelock import FileLock
+from torchvision import datasets, transforms
+
+import ray.train.torch
+from ray import train
+from ray.train import Checkpoint, ScalingConfig
+from ray.train.horovod import HorovodTrainer
+
+
+def metric_average(val, name):
+    tensor = torch.tensor(val)
+    avg_tensor = hvd.allreduce(tensor, name=name)
+    return avg_tensor.item()
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x)
+
+
+def setup(config):
+    data_dir = config.get("data_dir", None)
+    seed = config.get("seed", 42)
+    batch_size = config.get("batch_size", 64)
+    use_adasum = config.get("use_adasum", False)
+    lr = config.get("lr", 0.01)
+    momentum = config.get("momentum", 0.5)
+    use_cuda = config.get("use_cuda", False)
+
+    # Horovod: initialize library.
+    hvd.init()
+    torch.manual_seed(seed)
+
+    if use_cuda:
+        # Horovod: pin GPU to local rank.
+        torch.cuda.set_device(hvd.local_rank())
+        torch.cuda.manual_seed(seed)
+
+    # Horovod: limit # of CPU threads to be used per worker.
+    torch.set_num_threads(1)
+
+    kwargs = {"pin_memory": True} if use_cuda else {}
+    data_dir = data_dir or "~/data"
+    with FileLock(os.path.expanduser("~/.horovod_lock")):
+        train_dataset = datasets.MNIST(
+            data_dir,
+            train=True,
+            download=True,
+            transform=transforms.Compose(
+                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+            ),
+        )
+    # Horovod: use DistributedSampler to partition the training data.
+    train_sampler = torch.utils.data.distributed.DistributedSampler(
+        train_dataset, num_replicas=hvd.size(), rank=hvd.rank()
+    )
+    # Note, don't set `num_workers` in DataLoader (not even 1),
+    # as that will separately start multiple processes (each corresponding to 1 worker)
+    # to load the data. This is known to cause issues with Ray.
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=batch_size, sampler=train_sampler, **kwargs
+    )
+
+    model = Net()
+
+    # By default, Adasum doesn't need scaling up learning rate.
+    lr_scaler = hvd.size() if not use_adasum else 1
+
+    if use_cuda:
+        # Move model to GPU.
+        model.cuda()
+        # If using GPU Adasum allreduce, scale learning rate by local_size.
+        if use_adasum and hvd.nccl_built():
+            lr_scaler = hvd.local_size()
+
+    # Horovod: scale learning rate by lr_scaler.
+    optimizer = optim.SGD(model.parameters(), lr=lr * lr_scaler, momentum=momentum)
+
+    # Horovod: wrap optimizer with DistributedOptimizer.
+    optimizer = hvd.DistributedOptimizer(
+        optimizer,
+        named_parameters=model.named_parameters(),
+        op=hvd.Adasum if use_adasum else hvd.Average,
+    )
+
+    return model, optimizer, train_loader, train_sampler
+
+
+def train_epoch(
+    model, optimizer, train_sampler, train_loader, epoch, log_interval, use_cuda
+):
+    loss = None
+    model.train()
+    # Horovod: set epoch to sampler for shuffling.
+    train_sampler.set_epoch(epoch)
+    for batch_idx, (data, target) in enumerate(train_loader):
+        if use_cuda:
+            data, target = data.cuda(), target.cuda()
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % log_interval == 0:
+            # Horovod: use train_sampler to determine the number of
+            # examples in this worker's partition.
+            print(
+                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+                    epoch,
+                    batch_idx * len(data),
+                    len(train_sampler),
+                    100.0 * batch_idx / len(train_loader),
+                    loss.item(),
+                )
+            )
+    return loss.item() if loss else None
+
+
+def train_func(config):
+    num_epochs = config.get("num_epochs", 10)
+    log_interval = config.get("log_interval", 10)
+    use_cuda = config.get("use_cuda", False)
+
+    model, optimizer, train_loader, train_sampler = setup(config)
+
+    results = []
+    for epoch in range(num_epochs):
+        loss = train_epoch(
+            model, optimizer, train_sampler, train_loader, epoch, log_interval, use_cuda
+        )
+        results.append(loss)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            torch.save(model.state_dict(), os.path.join(tmpdir, "model.pt"))
+            train.report({"loss": loss}, checkpoint=Checkpoint.from_directory(tmpdir))
+
+    # Only used for testing.
+    return results
+
+
+def main(num_workers, use_gpu, kwargs):
+    trainer = HorovodTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config={
+            "num_epochs": kwargs["num_epochs"],
+            "log_interval": kwargs["log_interval"],
+            "use_cuda": kwargs["use_cuda"],
+        },
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+    )
+    result = trainer.fit()
+    print(result)
+
+
+if __name__ == "__main__":
+    # Training settings
+    parser = argparse.ArgumentParser(
+        description="PyTorch MNIST Example",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=64,
+        metavar="N",
+        help="input batch size for training (default: 64)",
+    )
+    parser.add_argument(
+        "--num-epochs",
+        type=int,
+        default=5,
+        metavar="N",
+        help="number of epochs to train (default: 10)",
+    )
+    parser.add_argument(
+        "--lr",
+        type=float,
+        default=0.01,
+        metavar="LR",
+        help="learning rate (default: 0.01)",
+    )
+    parser.add_argument(
+        "--momentum",
+        type=float,
+        default=0.5,
+        metavar="M",
+        help="SGD momentum (default: 0.5)",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="enables CUDA training"
+    )
+    parser.add_argument(
+        "--seed", type=int, default=42, metavar="S", help="random seed (default: 42)"
+    )
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=10,
+        metavar="N",
+        help="how many batches to wait before logging training status",
+    )
+    parser.add_argument(
+        "--use-adasum",
+        action="store_true",
+        default=False,
+        help="use adasum algorithm to do reduction",
+    )
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        default=2,
+        help="Number of Ray workers to use for training.",
+    )
+    parser.add_argument(
+        "--data-dir",
+        help="location of the training dataset in the local filesystem ("
+        "will be downloaded if needed)",
+    )
+    parser.add_argument(
+        "--address",
+        required=False,
+        type=str,
+        default=None,
+        help="Address of Ray cluster.",
+    )
+
+    args = parser.parse_args()
+
+    if args.address:
+        ray.init(args.address)
+    else:
+        ray.init()
+
+    use_cuda = args.use_gpu if args.use_gpu is not None else False
+
+    kwargs = {
+        "data_dir": args.data_dir,
+        "seed": args.seed,
+        "use_cuda": use_cuda,
+        "batch_size": args.batch_size,
+        "use_adasum": args.use_adasum if args.use_adasum else False,
+        "lr": args.lr,
+        "momentum": args.momentum,
+        "num_epochs": args.num_epochs,
+        "log_interval": args.log_interval,
+    }
+
+    main(num_workers=args.num_workers, use_gpu=use_cuda, kwargs=kwargs)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_tune_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_tune_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..9433d50635ad0a8db585aefc454dd6c9649685a3
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/horovod/horovod_tune_example.py
@@ -0,0 +1,139 @@
+import time
+
+import numpy as np
+import torch
+
+import ray
+import ray.train.torch
+from ray import train, tune
+from ray.train import ScalingConfig
+from ray.train.horovod import HorovodTrainer
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner
+
+
+def sq(x):
+    m2 = 1.0
+    m1 = -20.0
+    m0 = 50.0
+    return m2 * x * x + m1 * x + m0
+
+
+def qu(x):
+    m3 = 10.0
+    m2 = 5.0
+    m1 = -20.0
+    m0 = -5.0
+    return m3 * x * x * x + m2 * x * x + m1 * x + m0
+
+
+class Net(torch.nn.Module):
+    def __init__(self, mode="sq"):
+        super(Net, self).__init__()
+
+        if mode == "square":
+            self.mode = 0
+            self.param = torch.nn.Parameter(torch.FloatTensor([1.0, -1.0]))
+        else:
+            self.mode = 1
+            self.param = torch.nn.Parameter(torch.FloatTensor([1.0, -1.0, 1.0]))
+
+    def forward(self, x):
+        if ~self.mode:
+            return x * x + self.param[0] * x + self.param[1]
+        else:
+            return_val = 10 * x * x * x
+            return_val += self.param[0] * x * x
+            return_val += self.param[1] * x + self.param[2]
+            return return_val
+
+
+def train_loop_per_worker(config):
+    import horovod.torch as hvd
+    import torch
+
+    hvd.init()
+    device = ray.train.torch.get_device()
+    mode = config["mode"]
+    net = Net(mode).to(device)
+    optimizer = torch.optim.SGD(
+        net.parameters(),
+        lr=config["lr"],
+    )
+    optimizer = hvd.DistributedOptimizer(optimizer)
+
+    num_steps = 5
+    print(hvd.size())
+    np.random.seed(1 + hvd.rank())
+    torch.manual_seed(1234)
+    # To ensure consistent initialization across workers,
+    hvd.broadcast_parameters(net.state_dict(), root_rank=0)
+    hvd.broadcast_optimizer_state(optimizer, root_rank=0)
+
+    start = time.time()
+    x_max = config["x_max"]
+    for step in range(1, num_steps + 1):
+        features = torch.Tensor(np.random.rand(1) * 2 * x_max - x_max).to(device)
+        if mode == "square":
+            labels = sq(features)
+        else:
+            labels = qu(features)
+        optimizer.zero_grad()
+        outputs = net(features)
+        loss = torch.nn.MSELoss()(outputs, labels)
+        loss.backward()
+
+        optimizer.step()
+        time.sleep(0.1)
+        train.report(dict(loss=loss.item()))
+    total = time.time() - start
+    print(f"Took {total:0.3f} s. Avg: {total / num_steps:0.3f} s.")
+
+
+def tune_horovod(num_workers, num_samples, use_gpu, mode="square", x_max=1.0):
+    horovod_trainer = HorovodTrainer(
+        train_loop_per_worker=train_loop_per_worker,
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+        train_loop_config={"mode": mode, "x_max": x_max},
+    )
+
+    tuner = Tuner(
+        horovod_trainer,
+        param_space={"train_loop_config": {"lr": tune.uniform(0.1, 1)}},
+        tune_config=TuneConfig(mode="min", metric="loss", num_samples=num_samples),
+        _tuner_kwargs={"fail_fast": True},
+    )
+
+    result_grid = tuner.fit()
+
+    print("Best hyperparameters found were: ", result_grid.get_best_result().config)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode", type=str, default="square", choices=["square", "cubic"]
+    )
+    parser.add_argument(
+        "--learning_rate", type=float, default=0.1, dest="learning_rate"
+    )
+    parser.add_argument("--x_max", type=float, default=1.0, dest="x_max")
+    parser.add_argument("--gpu", action="store_true")
+    parser.add_argument(
+        "--smoke-test", action="store_true", help=("Finish quickly for testing.")
+    )
+    parser.add_argument("--num-workers", type=int, default=2)
+    args, _ = parser.parse_known_args()
+
+    if args.smoke_test:
+        ray.init(num_cpus=3)
+
+    tune_horovod(
+        num_workers=args.num_workers,
+        num_samples=2 if args.smoke_test else 10,
+        use_gpu=args.gpu,
+        mode=args.mode,
+        x_max=args.x_max,
+    )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..024675238ac9d234b2de1f9eeec567adb2528f80
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_fashion_mnist_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_fashion_mnist_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d4c856b4f2afcd729ea172dd1571b2a98872b71c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_fashion_mnist_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_linear_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_linear_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a86932e6b2de5c807942bf45c3ef54404e408f93
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_linear_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_quick_start.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_quick_start.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81176fc02476405d728b5078668ac056855d69f6
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_quick_start.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_regression_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_regression_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e06dc3bfc0a969d86eaf9ec9e62332b4555e58e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/torch_regression_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/tune_cifar_torch_pbt_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/tune_cifar_torch_pbt_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7323ae6502b3bc4d6403614a26354c259ebbefcc
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/tune_cifar_torch_pbt_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/tune_torch_regression_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/tune_torch_regression_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91015b20aefbcd7689cb88252fc0a7dbc45b875b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/__pycache__/tune_torch_regression_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5adeeb93fca714b51c1460c951a56ecd6946127c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__pycache__/auto_pipeline_for_host_to_device_data_transfer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__pycache__/auto_pipeline_for_host_to_device_data_transfer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e692020c26550e696de81c4212815d10a6543414
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/__pycache__/auto_pipeline_for_host_to_device_data_transfer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/auto_pipeline_for_host_to_device_data_transfer.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/auto_pipeline_for_host_to_device_data_transfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..28fe7461bc3c30dd1631bc30454b6a5dc64a9696
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_data_prefetch_benchmark/auto_pipeline_for_host_to_device_data_transfer.py
@@ -0,0 +1,161 @@
+# The PyTorch data transfer benchmark script.
+import argparse
+import warnings
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+import ray.train as train
+from ray.train import ScalingConfig
+from ray.train.torch import TorchTrainer
+
+
+class Net(nn.Module):
+    def __init__(self, in_d, hidden):
+        # output dim = 1
+        super(Net, self).__init__()
+        dims = [in_d] + hidden + [1]
+        self.layers = nn.ModuleList(
+            [nn.Linear(dims[i - 1], dims[i]) for i in range(len(dims))]
+        )
+
+    def forward(self, x):
+        for layer in self.layers:
+            x = layer(x)
+        return x
+
+
+class BenchmarkDataset(torch.utils.data.Dataset):
+    """Create a naive dataset for the benchmark"""
+
+    def __init__(self, dim, size=1000):
+        self.x = torch.from_numpy(np.random.normal(size=(size, dim))).float()
+        self.y = torch.from_numpy(np.random.normal(size=(size, 1))).float()
+        self.size = size
+
+    def __getitem__(self, index):
+        return self.x[index, None], self.y[index, None]
+
+    def __len__(self):
+        return self.size
+
+
+def train_epoch(epoch, dataloader, model, loss_fn, optimizer):
+    if train.get_context().get_world_size() > 1:
+        dataloader.sampler.set_epoch(epoch)
+
+    for X, y in dataloader:
+        # Compute prediction error
+        pred = model(X)
+        loss = loss_fn(pred, y)
+
+        # Backpropagation
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+
+def train_func(config):
+    data_size = config.get("data_size", 4096 * 50)
+    batch_size = config.get("batch_size", 4096)
+    hidden_size = config.get("hidden_size", 1)
+    use_auto_transfer = config.get("use_auto_transfer", False)
+    lr = config.get("lr", 1e-2)
+    epochs = config.get("epochs", 10)
+
+    train_dataset = BenchmarkDataset(4096, size=data_size)
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=batch_size, shuffle=True
+    )
+
+    train_loader = train.torch.prepare_data_loader(
+        data_loader=train_loader, move_to_device=True, auto_transfer=use_auto_transfer
+    )
+
+    model = Net(in_d=4096, hidden=[4096] * hidden_size)
+    model = train.torch.prepare_model(model)
+
+    loss_fn = nn.MSELoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
+
+    start = torch.cuda.Event(enable_timing=True)
+    end = torch.cuda.Event(enable_timing=True)
+
+    choice = "with" if use_auto_transfer else "without"
+    print(f"Starting the torch data prefetch benchmark {choice} auto pipeline...")
+
+    torch.cuda.synchronize()
+    start.record()
+    for epoch in range(epochs):
+        train_epoch(epoch, train_loader, model, loss_fn, optimizer)
+    end.record()
+    torch.cuda.synchronize()
+
+    print(
+        f"Finished the torch data prefetch benchmark {choice} "
+        f"auto pipeline: {start.elapsed_time(end)} ms."
+    )
+
+    return "Experiment done."
+
+
+def train_linear(num_workers=1, num_hidden_layers=1, use_auto_transfer=True, epochs=3):
+    config = {
+        "lr": 1e-2,
+        "hidden_size": num_hidden_layers,
+        "batch_size": 4096,
+        "epochs": epochs,
+        "use_auto_transfer": use_auto_transfer,
+    }
+    trainer = TorchTrainer(
+        train_func,
+        train_loop_config=config,
+        scaling_config=ScalingConfig(use_gpu=True, num_workers=num_workers),
+    )
+    results = trainer.fit()
+
+    print(results.metrics)
+    return results
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--epochs", type=int, default=1, help="Number of epochs to train for."
+    )
+    parser.add_argument(
+        "--num_hidden_layers",
+        type=int,
+        default=1,
+        help="Number of epochs to train for.",
+    )
+
+    args, _ = parser.parse_known_args()
+
+    import ray
+
+    ray.init(address=args.address)
+
+    if not torch.cuda.is_available():
+        warnings.warn("GPU is not available. Skip the test using auto pipeline.")
+    else:
+        train_linear(
+            num_workers=1,
+            num_hidden_layers=args.num_hidden_layers,
+            use_auto_transfer=True,
+            epochs=args.epochs,
+        )
+
+    torch.cuda.empty_cache()
+    train_linear(
+        num_workers=1,
+        num_hidden_layers=args.num_hidden_layers,
+        use_auto_transfer=False,
+        epochs=args.epochs,
+    )
+
+    ray.shutdown()
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_fashion_mnist_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_fashion_mnist_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..e26ed51ad6f4d572305f83892af39a7074311bd4
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_fashion_mnist_example.py
@@ -0,0 +1,152 @@
+import os
+from typing import Dict
+
+import torch
+from filelock import FileLock
+from torch import nn
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+from torchvision.transforms import Normalize, ToTensor
+from tqdm import tqdm
+
+import ray.train
+from ray.train import ScalingConfig
+from ray.train.torch import TorchTrainer
+
+
+def get_dataloaders(batch_size):
+    # Transform to normalize the input images
+    transform = transforms.Compose([ToTensor(), Normalize((0.5,), (0.5,))])
+
+    with FileLock(os.path.expanduser("~/data.lock")):
+        # Download training data from open datasets
+        training_data = datasets.FashionMNIST(
+            root="~/data",
+            train=True,
+            download=True,
+            transform=transform,
+        )
+
+        # Download test data from open datasets
+        test_data = datasets.FashionMNIST(
+            root="~/data",
+            train=False,
+            download=True,
+            transform=transform,
+        )
+
+    # Create data loaders
+    train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
+    test_dataloader = DataLoader(test_data, batch_size=batch_size)
+
+    return train_dataloader, test_dataloader
+
+
+# Model Definition
+class NeuralNetwork(nn.Module):
+    def __init__(self):
+        super(NeuralNetwork, self).__init__()
+        self.flatten = nn.Flatten()
+        self.linear_relu_stack = nn.Sequential(
+            nn.Linear(28 * 28, 512),
+            nn.ReLU(),
+            nn.Dropout(0.25),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Dropout(0.25),
+            nn.Linear(512, 10),
+            nn.ReLU(),
+        )
+
+    def forward(self, x):
+        x = self.flatten(x)
+        logits = self.linear_relu_stack(x)
+        return logits
+
+
+def train_func_per_worker(config: Dict):
+    lr = config["lr"]
+    epochs = config["epochs"]
+    batch_size = config["batch_size_per_worker"]
+
+    # Get dataloaders inside the worker training function
+    train_dataloader, test_dataloader = get_dataloaders(batch_size=batch_size)
+
+    # [1] Prepare Dataloader for distributed training
+    # Shard the datasets among workers and move batches to the correct device
+    # =======================================================================
+    train_dataloader = ray.train.torch.prepare_data_loader(train_dataloader)
+    test_dataloader = ray.train.torch.prepare_data_loader(test_dataloader)
+
+    model = NeuralNetwork()
+
+    # [2] Prepare and wrap your model with DistributedDataParallel
+    # Move the model to the correct GPU/CPU device
+    # ============================================================
+    model = ray.train.torch.prepare_model(model)
+
+    loss_fn = nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
+
+    # Model training loop
+    for epoch in range(epochs):
+        if ray.train.get_context().get_world_size() > 1:
+            # Required for the distributed sampler to shuffle properly across epochs.
+            train_dataloader.sampler.set_epoch(epoch)
+
+        model.train()
+        for X, y in tqdm(train_dataloader, desc=f"Train Epoch {epoch}"):
+            pred = model(X)
+            loss = loss_fn(pred, y)
+
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+        model.eval()
+        test_loss, num_correct, num_total = 0, 0, 0
+        with torch.no_grad():
+            for X, y in tqdm(test_dataloader, desc=f"Test Epoch {epoch}"):
+                pred = model(X)
+                loss = loss_fn(pred, y)
+
+                test_loss += loss.item()
+                num_total += y.shape[0]
+                num_correct += (pred.argmax(1) == y).sum().item()
+
+        test_loss /= len(test_dataloader)
+        accuracy = num_correct / num_total
+
+        # [3] Report metrics to Ray Train
+        # ===============================
+        ray.train.report(metrics={"loss": test_loss, "accuracy": accuracy})
+
+
+def train_fashion_mnist(num_workers=2, use_gpu=False):
+    global_batch_size = 32
+
+    train_config = {
+        "lr": 1e-3,
+        "epochs": 10,
+        "batch_size_per_worker": global_batch_size // num_workers,
+    }
+
+    # Configure computation resources
+    scaling_config = ScalingConfig(num_workers=num_workers, use_gpu=use_gpu)
+
+    # Initialize a Ray TorchTrainer
+    trainer = TorchTrainer(
+        train_loop_per_worker=train_func_per_worker,
+        train_loop_config=train_config,
+        scaling_config=scaling_config,
+    )
+
+    # [4] Start distributed training
+    # Run `train_func_per_worker` on all workers
+    # =============================================
+    result = trainer.fit()
+    print(f"Training result: {result}")
+
+
+if __name__ == "__main__":
+    train_fashion_mnist(num_workers=4, use_gpu=True)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_linear_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_linear_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..19d5848473b91719f488d19a3364ddc55da6d899
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_linear_example.py
@@ -0,0 +1,147 @@
+import argparse
+import os
+import tempfile
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+import ray.train as train
+from ray.train import Checkpoint, RunConfig, ScalingConfig
+from ray.train.torch import TorchTrainer
+
+
+class LinearDataset(torch.utils.data.Dataset):
+    """y = a * x + b"""
+
+    def __init__(self, a, b, size=1000):
+        x = np.arange(0, 10, 10 / size, dtype=np.float32)
+        self.x = torch.from_numpy(x)
+        self.y = torch.from_numpy(a * x + b)
+
+    def __getitem__(self, index):
+        return self.x[index, None], self.y[index, None]
+
+    def __len__(self):
+        return len(self.x)
+
+
+def train_epoch(epoch, dataloader, model, loss_fn, optimizer):
+    if train.get_context().get_world_size() > 1:
+        dataloader.sampler.set_epoch(epoch)
+
+    for X, y in dataloader:
+        # Compute prediction error
+        pred = model(X)
+        loss = loss_fn(pred, y)
+
+        # Backpropagation
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+
+def validate_epoch(dataloader, model, loss_fn):
+    num_batches = len(dataloader)
+    model.eval()
+    loss = 0
+    with torch.no_grad():
+        for X, y in dataloader:
+            pred = model(X)
+            loss += loss_fn(pred, y).item()
+    loss /= num_batches
+    import copy
+
+    model_copy = copy.deepcopy(model)
+    return model_copy.cpu().state_dict(), loss
+
+
+def train_func(config):
+    data_size = config.get("data_size", 1000)
+    val_size = config.get("val_size", 400)
+    batch_size = config.get("batch_size", 32)
+    hidden_size = config.get("hidden_size", 1)
+    lr = config.get("lr", 1e-2)
+    epochs = config.get("epochs", 3)
+
+    train_dataset = LinearDataset(2, 5, size=data_size)
+    val_dataset = LinearDataset(2, 5, size=val_size)
+    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
+    validation_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)
+
+    train_loader = train.torch.prepare_data_loader(train_loader)
+    validation_loader = train.torch.prepare_data_loader(validation_loader)
+
+    model = nn.Linear(1, hidden_size)
+    model = train.torch.prepare_model(model)
+
+    loss_fn = nn.MSELoss()
+
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
+
+    results = []
+    for epoch in range(epochs):
+        train_epoch(epoch, train_loader, model, loss_fn, optimizer)
+        state_dict, loss = validate_epoch(validation_loader, model, loss_fn)
+        result = dict(loss=loss)
+        results.append(result)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            torch.save(state_dict, os.path.join(tmpdir, "model.pt"))
+            train.report(result, checkpoint=Checkpoint.from_directory(tmpdir))
+
+    return results
+
+
+def train_linear(num_workers=2, use_gpu=False, epochs=3, storage_path=None):
+    config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": epochs}
+    trainer = TorchTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config=config,
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+        run_config=RunConfig(storage_path=storage_path),
+    )
+    result = trainer.fit()
+
+    print(result.metrics)
+    return result.metrics
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", help="Whether to use GPU for training."
+    )
+    parser.add_argument(
+        "--epochs", type=int, default=3, help="Number of epochs to train for."
+    )
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+
+    args, _ = parser.parse_known_args()
+
+    import ray
+
+    if args.smoke_test:
+        # 2 workers + 1 for trainer.
+        ray.init(num_cpus=3)
+        train_linear()
+    else:
+        ray.init(address=args.address)
+        train_linear(
+            num_workers=args.num_workers, use_gpu=args.use_gpu, epochs=args.epochs
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_quick_start.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_quick_start.py
new file mode 100644
index 0000000000000000000000000000000000000000..df1ae3461bd7aaac9d5c5d98c604115df36d3760
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_quick_start.py
@@ -0,0 +1,110 @@
+# ruff: noqa
+# fmt: off
+# isort: skip_file
+
+# __torch_setup_begin__
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torchvision.transforms import ToTensor
+
+def get_dataset():
+    return datasets.FashionMNIST(
+        root="/tmp/data",
+        train=True,
+        download=True,
+        transform=ToTensor(),
+    )
+
+class NeuralNetwork(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.flatten = nn.Flatten()
+        self.linear_relu_stack = nn.Sequential(
+            nn.Linear(28 * 28, 512),
+            nn.ReLU(),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Linear(512, 10),
+        )
+
+    def forward(self, inputs):
+        inputs = self.flatten(inputs)
+        logits = self.linear_relu_stack(inputs)
+        return logits
+# __torch_setup_end__
+
+# __torch_single_begin__
+def train_func():
+    num_epochs = 3
+    batch_size = 64
+
+    dataset = get_dataset()
+    dataloader = DataLoader(dataset, batch_size=batch_size)
+
+    model = NeuralNetwork()
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+    for epoch in range(num_epochs):
+        for inputs, labels in dataloader:
+            optimizer.zero_grad()
+            pred = model(inputs)
+            loss = criterion(pred, labels)
+            loss.backward()
+            optimizer.step()
+        print(f"epoch: {epoch}, loss: {loss.item()}")
+# __torch_single_end__
+
+# __torch_distributed_begin__
+import ray.train.torch
+
+def train_func_distributed():
+    num_epochs = 3
+    batch_size = 64
+
+    dataset = get_dataset()
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+    dataloader = ray.train.torch.prepare_data_loader(dataloader)
+
+    model = NeuralNetwork()
+    model = ray.train.torch.prepare_model(model)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+    for epoch in range(num_epochs):
+        if ray.train.get_context().get_world_size() > 1:
+            dataloader.sampler.set_epoch(epoch)
+
+        for inputs, labels in dataloader:
+            optimizer.zero_grad()
+            pred = model(inputs)
+            loss = criterion(pred, labels)
+            loss.backward()
+            optimizer.step()
+        print(f"epoch: {epoch}, loss: {loss.item()}")
+# __torch_distributed_end__
+
+
+if __name__ == "__main__":
+    # __torch_single_run_begin__
+    train_func()
+    # __torch_single_run_end__
+
+    # __torch_trainer_begin__
+    from ray.train.torch import TorchTrainer
+    from ray.train import ScalingConfig
+
+    # For GPU Training, set `use_gpu` to True.
+    use_gpu = False
+
+    trainer = TorchTrainer(
+        train_func_distributed,
+        scaling_config=ScalingConfig(num_workers=4, use_gpu=use_gpu)
+    )
+
+    results = trainer.fit()
+    # __torch_trainer_end__
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_regression_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bd54fbcb7ab2e8c778b220c39187e0ad0430ca5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/torch_regression_example.py
@@ -0,0 +1,160 @@
+import argparse
+import os
+import tempfile
+from typing import Tuple
+
+import pandas as pd
+import torch
+import torch.nn as nn
+
+import ray
+import ray.train as train
+from ray.data import Dataset
+from ray.train import Checkpoint, DataConfig, ScalingConfig
+from ray.train.torch import TorchTrainer
+
+
+def get_datasets(split: float = 0.7) -> Tuple[Dataset]:
+    dataset = ray.data.read_csv("s3://anonymous@air-example-data/regression.csv")
+
+    def combine_x(batch):
+        return pd.DataFrame(
+            {
+                "x": batch[[f"x{i:03d}" for i in range(100)]].values.tolist(),
+                "y": batch["y"],
+            }
+        )
+
+    dataset = dataset.map_batches(combine_x, batch_format="pandas")
+    train_dataset, validation_dataset = dataset.repartition(
+        num_blocks=4
+    ).train_test_split(split, shuffle=True)
+    return train_dataset, validation_dataset
+
+
+def train_epoch(iterable_dataset, model, loss_fn, optimizer, device):
+    model.train()
+    for X, y in iterable_dataset:
+        X = X.to(device)
+        y = y.to(device)
+
+        # Compute prediction error
+        pred = model(X)
+        loss = loss_fn(pred, y)
+
+        # Backpropagation
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+
+def validate_epoch(iterable_dataset, model, loss_fn, device):
+    num_batches = 0
+    model.eval()
+    loss = 0
+    with torch.no_grad():
+        for X, y in iterable_dataset:
+            X = X.to(device)
+            y = y.to(device)
+            num_batches += 1
+            pred = model(X)
+            loss += loss_fn(pred, y).item()
+    loss /= num_batches
+    result = {"loss": loss}
+    return result
+
+
+def train_func(config):
+    batch_size = config.get("batch_size", 32)
+    hidden_size = config.get("hidden_size", 10)
+    lr = config.get("lr", 1e-2)
+    epochs = config.get("epochs", 3)
+
+    train_dataset_shard = train.get_dataset_shard("train")
+    validation_dataset = train.get_dataset_shard("validation")
+
+    model = nn.Sequential(
+        nn.Linear(100, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1)
+    )
+    model = train.torch.prepare_model(model)
+
+    loss_fn = nn.L1Loss()
+
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
+
+    results = []
+
+    def create_torch_iterator(shard):
+        iterator = shard.iter_torch_batches(batch_size=batch_size)
+        for batch in iterator:
+            yield batch["x"].float(), batch["y"].float()
+
+    for _ in range(epochs):
+        train_torch_dataset = create_torch_iterator(train_dataset_shard)
+        validation_torch_dataset = create_torch_iterator(validation_dataset)
+
+        device = train.torch.get_device()
+
+        train_epoch(train_torch_dataset, model, loss_fn, optimizer, device)
+        if train.get_context().get_world_rank() == 0:
+            result = validate_epoch(validation_torch_dataset, model, loss_fn, device)
+        else:
+            result = {}
+        results.append(result)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            torch.save(model.module.state_dict(), os.path.join(tmpdir, "model.pt"))
+            train.report(result, checkpoint=Checkpoint.from_directory(tmpdir))
+
+    return results
+
+
+def train_regression(num_workers=2, use_gpu=False):
+    train_dataset, val_dataset = get_datasets()
+    config = {"lr": 1e-2, "hidden_size": 20, "batch_size": 4, "epochs": 3}
+
+    trainer = TorchTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config=config,
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+        datasets={"train": train_dataset, "validation": val_dataset},
+        dataset_config=DataConfig(datasets_to_split=["train"]),
+    )
+
+    result = trainer.fit()
+    print(result.metrics)
+    return result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Use GPU for training."
+    )
+
+    args, _ = parser.parse_known_args()
+
+    if args.smoke_test:
+        # 2 workers, 1 for trainer, 1 for datasets
+        ray.init(num_cpus=4)
+        result = train_regression()
+    else:
+        ray.init(address=args.address)
+        result = train_regression(num_workers=args.num_workers, use_gpu=args.use_gpu)
+    print(result)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/tune_cifar_torch_pbt_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/tune_cifar_torch_pbt_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b5694884bd01b752b72a14a8a606fc12ad3052
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/tune_cifar_torch_pbt_example.py
@@ -0,0 +1,253 @@
+import argparse
+import os
+import tempfile
+
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+from filelock import FileLock
+from torch.utils.data import DataLoader, Subset
+from torchvision.datasets import CIFAR10
+from torchvision.models import resnet18
+
+import ray
+import ray.cloudpickle as cpickle
+from ray import train, tune
+from ray.train import Checkpoint, FailureConfig, RunConfig, ScalingConfig
+from ray.train.torch import TorchTrainer
+from ray.tune.schedulers import PopulationBasedTraining
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner
+
+
+def train_epoch(epoch, dataloader, model, loss_fn, optimizer):
+    if ray.train.get_context().get_world_size() > 1:
+        dataloader.sampler.set_epoch(epoch)
+
+    size = len(dataloader.dataset) // train.get_context().get_world_size()
+    model.train()
+    for batch, (X, y) in enumerate(dataloader):
+        # Compute prediction error
+        pred = model(X)
+        loss = loss_fn(pred, y)
+
+        # Backpropagation
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        if batch % 100 == 0:
+            loss, current = loss.item(), batch * len(X)
+            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
+
+
+def validate_epoch(dataloader, model, loss_fn):
+    size = len(dataloader.dataset) // train.get_context().get_world_size()
+    num_batches = len(dataloader)
+    model.eval()
+    test_loss, correct = 0, 0
+    with torch.no_grad():
+        for X, y in dataloader:
+            pred = model(X)
+            test_loss += loss_fn(pred, y).item()
+            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
+    test_loss /= num_batches
+    correct /= size
+    print(
+        f"Test Error: \n "
+        f"Accuracy: {(100 * correct):>0.1f}%, "
+        f"Avg loss: {test_loss:>8f} \n"
+    )
+    return {"loss": test_loss}
+
+
+def update_optimizer_config(optimizer, config):
+    for param_group in optimizer.param_groups:
+        for param, val in config.items():
+            param_group[param] = val
+
+
+def train_func(config):
+    epochs = config.get("epochs", 3)
+
+    model = resnet18()
+
+    # Note that `prepare_model` needs to be called before setting optimizer.
+    if not train.get_checkpoint():  # fresh start
+        model = train.torch.prepare_model(model)
+
+    # Create optimizer.
+    optimizer_config = {
+        "lr": config.get("lr"),
+        "momentum": config.get("momentum"),
+    }
+    optimizer = torch.optim.SGD(model.parameters(), **optimizer_config)
+
+    starting_epoch = 0
+    if train.get_checkpoint():
+        with train.get_checkpoint().as_directory() as checkpoint_dir:
+            with open(os.path.join(checkpoint_dir, "data.ckpt"), "rb") as fp:
+                checkpoint_dict = cpickle.load(fp)
+
+        # Load in model
+        model_state = checkpoint_dict["model"]
+        model.load_state_dict(model_state)
+        model = train.torch.prepare_model(model)
+
+        # Load in optimizer
+        optimizer_state = checkpoint_dict["optimizer_state_dict"]
+        optimizer.load_state_dict(optimizer_state)
+
+        # Optimizer configs (`lr`, `momentum`) are being mutated by PBT and passed in
+        # through config, so we need to update the optimizer loaded from the checkpoint
+        update_optimizer_config(optimizer, optimizer_config)
+
+        # The current epoch increments the loaded epoch by 1
+        checkpoint_epoch = checkpoint_dict["epoch"]
+        starting_epoch = checkpoint_epoch + 1
+
+    # Load in training and validation data.
+    transform_train = transforms.Compose(
+        [
+            transforms.RandomCrop(32, padding=4),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+        ]
+    )  # meanstd transformation
+
+    transform_test = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+        ]
+    )
+
+    data_dir = config.get("data_dir", os.path.expanduser("~/data"))
+    os.makedirs(data_dir, exist_ok=True)
+    with FileLock(os.path.join(data_dir, ".ray.lock")):
+        train_dataset = CIFAR10(
+            root=data_dir, train=True, download=True, transform=transform_train
+        )
+        validation_dataset = CIFAR10(
+            root=data_dir, train=False, download=False, transform=transform_test
+        )
+
+    if config.get("test_mode"):
+        train_dataset = Subset(train_dataset, list(range(64)))
+        validation_dataset = Subset(validation_dataset, list(range(64)))
+
+    worker_batch_size = config["batch_size"] // train.get_context().get_world_size()
+
+    train_loader = DataLoader(train_dataset, batch_size=worker_batch_size, shuffle=True)
+    validation_loader = DataLoader(validation_dataset, batch_size=worker_batch_size)
+
+    train_loader = train.torch.prepare_data_loader(train_loader)
+    validation_loader = train.torch.prepare_data_loader(validation_loader)
+
+    # Create loss.
+    criterion = nn.CrossEntropyLoss()
+
+    for epoch in range(starting_epoch, epochs):
+        train_epoch(epoch, train_loader, model, criterion, optimizer)
+        result = validate_epoch(validation_loader, model, criterion)
+
+        with tempfile.TemporaryDirectory() as checkpoint_dir:
+            with open(os.path.join(checkpoint_dir, "data.ckpt"), "wb") as fp:
+                cpickle.dump(
+                    {
+                        "epoch": epoch,
+                        "model": model.state_dict(),
+                        "optimizer_state_dict": optimizer.state_dict(),
+                    },
+                    fp,
+                )
+            checkpoint = Checkpoint.from_directory(checkpoint_dir)
+            train.report(result, checkpoint=checkpoint)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="The address to use for Redis."
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--num-epochs", type=int, default=5, help="Number of epochs to train."
+    )
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Enables GPU training."
+    )
+    parser.add_argument(
+        "--data-dir",
+        required=False,
+        type=str,
+        default="~/data",
+        help="Root directory for storing downloaded dataset.",
+    )
+    parser.add_argument(
+        "--synch", action="store_true", default=False, help="Use synchronous PBT."
+    )
+
+    args, _ = parser.parse_known_args()
+    if args.smoke_test:
+        ray.init(num_cpus=4)
+    else:
+        ray.init(address=args.address)
+
+    trainer = TorchTrainer(
+        train_func,
+        scaling_config=ScalingConfig(
+            num_workers=args.num_workers, use_gpu=args.use_gpu
+        ),
+    )
+    pbt_scheduler = PopulationBasedTraining(
+        time_attr="training_iteration",
+        perturbation_interval=1,
+        hyperparam_mutations={
+            "train_loop_config": {
+                # distribution for resampling
+                "lr": tune.loguniform(0.001, 0.1),
+                # allow perturbations within this set of categorical values
+                "momentum": [0.8, 0.9, 0.99],
+            }
+        },
+        synch=args.synch,
+    )
+
+    tuner = Tuner(
+        trainer,
+        param_space={
+            "train_loop_config": {
+                "lr": tune.grid_search([0.001, 0.01, 0.05, 0.1]),
+                "momentum": 0.8,
+                "batch_size": 128 * args.num_workers,
+                "test_mode": args.smoke_test,  # whether to to subset the data
+                "data_dir": args.data_dir,
+                "epochs": args.num_epochs,
+            }
+        },
+        tune_config=TuneConfig(
+            num_samples=1, metric="loss", mode="min", scheduler=pbt_scheduler
+        ),
+        run_config=RunConfig(
+            stop={"training_iteration": 3 if args.smoke_test else args.num_epochs},
+            failure_config=FailureConfig(max_failures=3),  # used for fault tolerance
+        ),
+    )
+
+    results = tuner.fit()
+
+    print(results.get_best_result(metric="loss", mode="min"))
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/tune_torch_regression_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/tune_torch_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8221c995110f5f8c5b6d48b25d87868a3f78c0c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch/tune_torch_regression_example.py
@@ -0,0 +1,82 @@
+import argparse
+
+import ray
+from ray import tune
+from ray.train import DataConfig, ScalingConfig
+from ray.train.examples.pytorch.torch_regression_example import get_datasets, train_func
+from ray.train.torch import TorchTrainer
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner
+
+
+def tune_linear(num_workers, num_samples, use_gpu):
+    train_dataset, val_dataset = get_datasets()
+
+    config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 3}
+
+    trainer = TorchTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config=config,
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+        datasets={"train": train_dataset, "validation": val_dataset},
+        dataset_config=DataConfig(datasets_to_split=["train"]),
+    )
+
+    tuner = Tuner(
+        trainer,
+        param_space={
+            "train_loop_config": {
+                "lr": tune.loguniform(1e-4, 1e-1),
+                "batch_size": tune.choice([4, 16, 32]),
+                "epochs": 3,
+            }
+        },
+        tune_config=TuneConfig(num_samples=num_samples, metric="loss", mode="min"),
+    )
+    result_grid = tuner.fit()
+    best_result = result_grid.get_best_result()
+    print(best_result)
+    return best_result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=2,
+        help="Sets number of samples for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Use GPU for training."
+    )
+
+    args = parser.parse_args()
+
+    if args.smoke_test:
+        # 2 workers, 1 for trainer, 1 for datasets
+        ray.init(num_cpus=4)
+        tune_linear(num_workers=2, num_samples=1, use_gpu=False)
+    else:
+        ray.init(address=args.address)
+        tune_linear(
+            num_workers=args.num_workers,
+            use_gpu=args.use_gpu,
+            num_samples=args.num_samples,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d895d6faad274c9164c67a873a0dd2f53a5d976
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__pycache__/distributed_sage_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__pycache__/distributed_sage_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0efa4343f44e658b3effcc4faa36a9871e18ff1b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/__pycache__/distributed_sage_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/distributed_sage_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/distributed_sage_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f3b1e126d05a266b43496fa9ec3c097c3aaba19
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/pytorch_geometric/distributed_sage_example.py
@@ -0,0 +1,228 @@
+# Adapted from https://github.com/pyg-team/pytorch_geometric/blob/2.1.0
+# /examples/multi_gpu/distributed_sampling.py
+
+import argparse
+import os
+
+import torch
+import torch.nn.functional as F
+from filelock import FileLock
+from torch_geometric.datasets import FakeDataset, Reddit
+from torch_geometric.loader import NeighborSampler
+from torch_geometric.nn import SAGEConv
+from torch_geometric.transforms import RandomNodeSplit
+
+from ray import train
+from ray.train import ScalingConfig
+from ray.train.torch import TorchTrainer
+
+
+class SAGE(torch.nn.Module):
+    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2):
+        super().__init__()
+        self.num_layers = num_layers
+
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(SAGEConv(in_channels, hidden_channels))
+        for _ in range(self.num_layers - 2):
+            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
+        self.convs.append(SAGEConv(hidden_channels, out_channels))
+
+    def forward(self, x, adjs):
+        for i, (edge_index, _, size) in enumerate(adjs):
+            x_target = x[: size[1]]  # Target nodes are always placed first.
+            x = self.convs[i]((x, x_target), edge_index)
+            if i != self.num_layers - 1:
+                x = F.relu(x)
+                x = F.dropout(x, p=0.5, training=self.training)
+        return x.log_softmax(dim=-1)
+
+    @torch.no_grad()
+    def test(self, x_all, subgraph_loader):
+        for i in range(self.num_layers):
+            xs = []
+            for batch_size, n_id, adj in subgraph_loader:
+                edge_index, _, size = adj
+                x = x_all[n_id.to(x_all.device)].to(train.torch.get_device())
+                x_target = x[: size[1]]
+                x = self.convs[i]((x, x_target), edge_index)
+                if i != self.num_layers - 1:
+                    x = F.relu(x)
+                xs.append(x.cpu())
+
+            x_all = torch.cat(xs, dim=0)
+
+        return x_all
+
+
+def train_loop_per_worker(train_loop_config):
+    dataset = train_loop_config["dataset_fn"]()
+    batch_size = train_loop_config["batch_size"]
+    num_epochs = train_loop_config["num_epochs"]
+
+    data = dataset[0]
+    train_idx = data.train_mask.nonzero(as_tuple=False).view(-1)
+    train_idx = train_idx.split(
+        train_idx.size(0) // train.get_context().get_world_size()
+    )[train.get_context().get_world_rank()]
+
+    train_loader = NeighborSampler(
+        data.edge_index,
+        node_idx=train_idx,
+        sizes=[25, 10],
+        batch_size=batch_size,
+        shuffle=True,
+    )
+
+    # Disable distributed sampler since the train_loader has already been split above.
+    train_loader = train.torch.prepare_data_loader(train_loader, add_dist_sampler=False)
+
+    # Do validation on rank 0 worker only.
+    if train.get_context().get_world_rank() == 0:
+        subgraph_loader = NeighborSampler(
+            data.edge_index, node_idx=None, sizes=[-1], batch_size=2048, shuffle=False
+        )
+        subgraph_loader = train.torch.prepare_data_loader(
+            subgraph_loader, add_dist_sampler=False
+        )
+
+    model = SAGE(dataset.num_features, 256, dataset.num_classes)
+    model = train.torch.prepare_model(model)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+
+    x, y = data.x.to(train.torch.get_device()), data.y.to(train.torch.get_device())
+
+    for epoch in range(num_epochs):
+        model.train()
+
+        # ``batch_size`` is the number of samples in the current batch.
+        # ``n_id`` are the ids of all the nodes used in the computation. This is
+        # needed to pull in the necessary features just for the current batch that is
+        # being trained on.
+        # ``adjs`` is a list of 3 element tuple consisting of ``(edge_index, e_id,
+        # size)`` for each sample in the batch, where ``edge_index``represent the
+        # edges of the sampled subgraph, ``e_id`` are the ids of the edges in the
+        # sample, and ``size`` holds the shape of the subgraph.
+        # See ``torch_geometric.loader.neighbor_sampler.NeighborSampler`` for more info.
+        for batch_size, n_id, adjs in train_loader:
+            optimizer.zero_grad()
+            out = model(x[n_id], adjs)
+            loss = F.nll_loss(out, y[n_id[:batch_size]])
+            loss.backward()
+            optimizer.step()
+
+        if train.get_context().get_world_rank() == 0:
+            print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}")
+
+        train_accuracy = validation_accuracy = test_accuracy = None
+
+        # Do validation on rank 0 worker only.
+        if train.get_context().get_world_rank() == 0:
+            model.eval()
+            with torch.no_grad():
+                out = model.module.test(x, subgraph_loader)
+            res = out.argmax(dim=-1) == data.y
+            train_accuracy = int(res[data.train_mask].sum()) / int(
+                data.train_mask.sum()
+            )
+            validation_accuracy = int(res[data.val_mask].sum()) / int(
+                data.val_mask.sum()
+            )
+            test_accuracy = int(res[data.test_mask].sum()) / int(data.test_mask.sum())
+
+        train.report(
+            dict(
+                train_accuracy=train_accuracy,
+                validation_accuracy=validation_accuracy,
+                test_accuracy=test_accuracy,
+            )
+        )
+
+
+def gen_fake_dataset():
+    """Returns a function to be called on each worker that returns a Fake Dataset."""
+
+    # For fake dataset, since the dataset is randomized, we create it once on the
+    # driver, and then send the same dataset to all the training workers.
+    # Use 10% of nodes for validation and 10% for testing.
+    fake_dataset = FakeDataset(transform=RandomNodeSplit(num_val=0.1, num_test=0.1))
+
+    def gen_dataset():
+        return fake_dataset
+
+    return gen_dataset
+
+
+def gen_reddit_dataset():
+    """Returns a function to be called on each worker that returns Reddit Dataset."""
+
+    # For Reddit dataset, we have to download the data on each node, so we create the
+    # dataset on each training worker.
+    with FileLock(os.path.expanduser("~/.reddit_dataset_lock")):
+        dataset = Reddit("./data/Reddit")
+    return dataset
+
+
+def train_gnn(
+    num_workers=2, use_gpu=False, epochs=3, global_batch_size=32, dataset="reddit"
+):
+    per_worker_batch_size = global_batch_size // num_workers
+
+    trainer = TorchTrainer(
+        train_loop_per_worker=train_loop_per_worker,
+        train_loop_config={
+            "num_epochs": epochs,
+            "batch_size": per_worker_batch_size,
+            "dataset_fn": gen_reddit_dataset
+            if dataset == "reddit"
+            else gen_fake_dataset(),
+        },
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+    )
+    result = trainer.fit()
+    print(result.metrics)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", help="Whether to use GPU for training."
+    )
+    parser.add_argument(
+        "--epochs", type=int, default=3, help="Number of epochs to train for."
+    )
+    parser.add_argument(
+        "--global-batch-size",
+        "-b",
+        type=int,
+        default=32,
+        help="Global batch size to use for training.",
+    )
+    parser.add_argument(
+        "--dataset",
+        "-d",
+        type=str,
+        choices=["reddit", "fake"],
+        default="reddit",
+        help="The dataset to use. Either 'reddit' or 'fake' Defaults to 'reddit'.",
+    )
+
+    args, _ = parser.parse_known_args()
+
+    train_gnn(
+        num_workers=args.num_workers,
+        use_gpu=args.use_gpu,
+        epochs=args.epochs,
+        global_batch_size=args.global_batch_size,
+        dataset=args.dataset,
+    )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9581a7511f678a6a148c8495204718f5fd58f1ae
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_autoencoder_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_autoencoder_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db37eb16bca1226989077c05d286885e37c46c03
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_autoencoder_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_mnist_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_mnist_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d42c7dbf0c8fa4581a45921aee139fc2ea74e09
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_mnist_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_quick_start.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_quick_start.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6dc7bd8c71a0060507936c2e5d6da461690d9bd9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_quick_start.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_regression_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_regression_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1cf560b3f2bd2b96d6d80768556ae1d253233075
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tensorflow_regression_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tune_tensorflow_autoencoder_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tune_tensorflow_autoencoder_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..976c7072987738a0e451eb0e81195f69f539d12d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tune_tensorflow_autoencoder_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tune_tensorflow_mnist_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tune_tensorflow_mnist_example.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6545a360c5cf132bbad1b615dcc18a8538017326
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/__pycache__/tune_tensorflow_mnist_example.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_autoencoder_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_autoencoder_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d976203efdc9fee5bbfbb27a8aa39d080bea994
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_autoencoder_example.py
@@ -0,0 +1,174 @@
+# This example showcases how to use Tensorflow with Ray Train.
+# Original code:
+# https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras
+# https://blog.keras.io/building-autoencoders-in-keras.html
+import argparse
+
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+import tensorflow_datasets as tfds
+
+import ray
+from ray import train
+from ray.air.integrations.keras import ReportCheckpointCallback
+from ray.data.datasource import SimpleTensorFlowDatasource
+from ray.data.extensions import TensorArray
+from ray.train import Result, ScalingConfig
+from ray.train.tensorflow import TensorflowTrainer, prepare_dataset_shard
+
+
+def get_dataset(split_type="train"):
+    def dataset_factory():
+        return tfds.load("mnist", split=[split_type], as_supervised=True)[0].take(128)
+
+    dataset = ray.data.read_datasource(
+        SimpleTensorFlowDatasource(), dataset_factory=dataset_factory
+    )
+
+    def normalize_images(x):
+        x = np.float32(x.numpy()) / 255.0
+        x = np.reshape(x, (-1,))
+        return x
+
+    def preprocess_dataset(batch):
+        return [
+            (normalize_images(image), normalize_images(image)) for image, _ in batch
+        ]
+
+    dataset = dataset.map_batches(preprocess_dataset)
+
+    def convert_batch_to_pandas(batch):
+
+        images = [TensorArray(image) for image, _ in batch]
+        # because we did autoencoder here
+        df = pd.DataFrame({"image": images, "label": images})
+        return df
+
+    dataset = dataset.map_batches(convert_batch_to_pandas)
+    return dataset
+
+
+def build_autoencoder_model() -> tf.keras.Model:
+    model = tf.keras.Sequential(
+        [
+            tf.keras.Input(shape=(784,)),
+            # encoder
+            tf.keras.layers.Dense(128, activation="relu"),
+            tf.keras.layers.Dense(64, activation="relu"),
+            tf.keras.layers.Dense(32, activation="relu"),
+            # decoder
+            tf.keras.layers.Dense(64, activation="relu"),
+            tf.keras.layers.Dense(128, activation="relu"),
+            tf.keras.layers.Dense(784, activation="sigmoid"),
+        ]
+    )
+    return model
+
+
+def train_func(config: dict):
+
+    per_worker_batch_size = config.get("batch_size", 64)
+    epochs = config.get("epochs", 3)
+
+    dataset_shard = train.get_dataset_shard("train")
+
+    strategy = tf.distribute.MultiWorkerMirroredStrategy()
+
+    with strategy.scope():
+        # Model building/compiling need to be within `strategy.scope()`.
+        multi_worker_model = build_autoencoder_model()
+        learning_rate = config.get("lr", 0.001)
+        multi_worker_model.compile(
+            loss=tf.keras.losses.BinaryCrossentropy(),
+            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
+            metrics=[
+                "binary_crossentropy",
+            ],
+        )
+
+    def to_tf_dataset(dataset, batch_size):
+        def to_tensor_iterator():
+            for batch in dataset.iter_tf_batches(
+                batch_size=batch_size, dtypes=tf.float32
+            ):
+                yield batch["image"], batch["label"]
+
+        output_signature = (
+            tf.TensorSpec(shape=(None, 784), dtype=tf.float32),
+            tf.TensorSpec(shape=(None, 784), dtype=tf.float32),
+        )
+        tf_dataset = tf.data.Dataset.from_generator(
+            to_tensor_iterator, output_signature=output_signature
+        )
+        return prepare_dataset_shard(tf_dataset)
+
+    results = []
+    for epoch in range(epochs):
+        tf_dataset = to_tf_dataset(
+            dataset=dataset_shard,
+            batch_size=per_worker_batch_size,
+        )
+        history = multi_worker_model.fit(
+            tf_dataset, callbacks=[ReportCheckpointCallback()]
+        )
+        results.append(history.history)
+    return results
+
+
+def train_tensorflow_mnist(
+    num_workers: int = 2, use_gpu: bool = False, epochs: int = 4
+) -> Result:
+    train_dataset = get_dataset(split_type="train")
+    config = {"lr": 1e-3, "batch_size": 64, "epochs": epochs}
+    scaling_config = ScalingConfig(num_workers=num_workers, use_gpu=use_gpu)
+    trainer = TensorflowTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config=config,
+        datasets={"train": train_dataset},
+        scaling_config=scaling_config,
+    )
+
+    results = trainer.fit()
+    print(results.metrics)
+    return results
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Enables GPU training"
+    )
+    parser.add_argument(
+        "--epochs", type=int, default=3, help="Number of epochs to train for."
+    )
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+
+    args, _ = parser.parse_known_args()
+
+    if args.smoke_test:
+        # 2 workers, 1 for trainer, 1 for datasets
+        num_gpus = args.num_workers if args.use_gpu else 0
+        ray.init(num_cpus=4, num_gpus=num_gpus)
+        result = train_tensorflow_mnist(num_workers=2, use_gpu=args.use_gpu)
+    else:
+        ray.init(address=args.address)
+        result = train_tensorflow_mnist(
+            num_workers=args.num_workers, use_gpu=args.use_gpu, epochs=args.epochs
+        )
+    print(result)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_mnist_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_mnist_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fd5d7c759df6723a7a16a622d70c743519ed638
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_mnist_example.py
@@ -0,0 +1,135 @@
+# This example showcases how to use Tensorflow with Ray Train.
+# Original code:
+# https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras
+import argparse
+import json
+import os
+
+import numpy as np
+import tensorflow as tf
+from filelock import FileLock
+
+from ray.air.integrations.keras import ReportCheckpointCallback
+from ray.train import Result, RunConfig, ScalingConfig
+from ray.train.tensorflow import TensorflowTrainer
+
+
+def mnist_dataset(batch_size: int) -> tf.data.Dataset:
+    with FileLock(os.path.expanduser("~/.mnist_lock")):
+        (x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
+    # The `x` arrays are in uint8 and have values in the [0, 255] range.
+    # You need to convert them to float32 with values in the [0, 1] range.
+    x_train = x_train / np.float32(255)
+    y_train = y_train.astype(np.int64)
+    train_dataset = (
+        tf.data.Dataset.from_tensor_slices((x_train, y_train))
+        .shuffle(60000)
+        .repeat()
+        .batch(batch_size)
+    )
+    return train_dataset
+
+
+def build_cnn_model() -> tf.keras.Model:
+    model = tf.keras.Sequential(
+        [
+            tf.keras.Input(shape=(28, 28)),
+            tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
+            tf.keras.layers.Conv2D(32, 3, activation="relu"),
+            tf.keras.layers.Flatten(),
+            tf.keras.layers.Dense(128, activation="relu"),
+            tf.keras.layers.Dense(10),
+        ]
+    )
+    return model
+
+
+def train_func(config: dict):
+    per_worker_batch_size = config.get("batch_size", 64)
+    epochs = config.get("epochs", 3)
+    steps_per_epoch = config.get("steps_per_epoch", 70)
+
+    tf_config = json.loads(os.environ["TF_CONFIG"])
+    num_workers = len(tf_config["cluster"]["worker"])
+
+    strategy = tf.distribute.MultiWorkerMirroredStrategy()
+
+    global_batch_size = per_worker_batch_size * num_workers
+    multi_worker_dataset = mnist_dataset(global_batch_size)
+
+    with strategy.scope():
+        # Model building/compiling need to be within `strategy.scope()`.
+        multi_worker_model = build_cnn_model()
+        learning_rate = config.get("lr", 0.001)
+        multi_worker_model.compile(
+            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
+            metrics=["accuracy"],
+        )
+
+    history = multi_worker_model.fit(
+        multi_worker_dataset,
+        epochs=epochs,
+        steps_per_epoch=steps_per_epoch,
+        callbacks=[ReportCheckpointCallback()],
+    )
+    results = history.history
+    return results
+
+
+def train_tensorflow_mnist(
+    num_workers: int = 2,
+    use_gpu: bool = False,
+    epochs: int = 4,
+    storage_path: str = None,
+) -> Result:
+    config = {"lr": 1e-3, "batch_size": 64, "epochs": epochs}
+    trainer = TensorflowTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config=config,
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+        run_config=RunConfig(storage_path=storage_path),
+    )
+    results = trainer.fit()
+    return results
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Enables GPU training"
+    )
+    parser.add_argument(
+        "--epochs", type=int, default=3, help="Number of epochs to train for."
+    )
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+
+    args, _ = parser.parse_known_args()
+
+    import ray
+
+    if args.smoke_test:
+        # 2 workers, 1 for trainer, 1 for datasets
+        num_gpus = args.num_workers if args.use_gpu else 0
+        ray.init(num_cpus=4, num_gpus=num_gpus)
+        train_tensorflow_mnist(num_workers=2, use_gpu=args.use_gpu)
+    else:
+        ray.init(address=args.address)
+        train_tensorflow_mnist(
+            num_workers=args.num_workers, use_gpu=args.use_gpu, epochs=args.epochs
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_quick_start.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_quick_start.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b078675960230461f3c6d493d10a3f56ec0ddea
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_quick_start.py
@@ -0,0 +1,87 @@
+# ruff: noqa
+# fmt: off
+# isort: skip_file
+
+# __tf_setup_begin__
+import sys
+import numpy as np
+
+if sys.version_info >= (3, 12):
+    # Tensorflow is not installed for Python 3.12 because of keras compatibility.
+    sys.exit(0)
+else:
+    import tensorflow as tf
+
+def mnist_dataset(batch_size):
+    (x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
+    # The `x` arrays are in uint8 and have values in the [0, 255] range.
+    # You need to convert them to float32 with values in the [0, 1] range.
+    x_train = x_train / np.float32(255)
+    y_train = y_train.astype(np.int64)
+    train_dataset = tf.data.Dataset.from_tensor_slices(
+        (x_train, y_train)).shuffle(60000).repeat().batch(batch_size)
+    return train_dataset
+
+
+def build_and_compile_cnn_model():
+    model = tf.keras.Sequential([
+        tf.keras.layers.InputLayer(input_shape=(28, 28)),
+        tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
+        tf.keras.layers.Conv2D(32, 3, activation='relu'),
+        tf.keras.layers.Flatten(),
+        tf.keras.layers.Dense(128, activation='relu'),
+        tf.keras.layers.Dense(10)
+    ])
+    model.compile(
+        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
+        metrics=['accuracy'])
+    return model
+# __tf_setup_end__
+
+# __tf_single_begin__
+def train_func():
+    batch_size = 64
+    single_worker_dataset = mnist_dataset(batch_size)
+    single_worker_model = build_and_compile_cnn_model()
+    single_worker_model.fit(single_worker_dataset, epochs=3, steps_per_epoch=70)
+# __tf_single_end__
+
+# __tf_distributed_begin__
+import json
+import os
+
+def train_func_distributed():
+    per_worker_batch_size = 64
+    # This environment variable will be set by Ray Train.
+    tf_config = json.loads(os.environ['TF_CONFIG'])
+    num_workers = len(tf_config['cluster']['worker'])
+
+    strategy = tf.distribute.MultiWorkerMirroredStrategy()
+
+    global_batch_size = per_worker_batch_size * num_workers
+    multi_worker_dataset = mnist_dataset(global_batch_size)
+
+    with strategy.scope():
+        # Model building/compiling need to be within `strategy.scope()`.
+        multi_worker_model = build_and_compile_cnn_model()
+
+    multi_worker_model.fit(multi_worker_dataset, epochs=3, steps_per_epoch=70)
+# __tf_distributed_end__
+
+if __name__ == "__main__":
+    # __tf_single_run_begin__
+    train_func()
+    # __tf_single_run_end__
+
+    # __tf_trainer_begin__
+    from ray.train.tensorflow import TensorflowTrainer
+    from ray.train import ScalingConfig
+
+    # For GPU Training, set `use_gpu` to True.
+    use_gpu = False
+
+    trainer = TensorflowTrainer(train_func_distributed, scaling_config=ScalingConfig(num_workers=4, use_gpu=use_gpu))
+
+    trainer.fit()
+    # __tf_trainer_end__
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_regression_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4c80f88bd7f9e43a58a4561905ce1f0fe590a9a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tensorflow_regression_example.py
@@ -0,0 +1,111 @@
+import argparse
+import sys
+
+import ray
+from ray import train
+from ray.data.preprocessors import Concatenator
+from ray.train import Result, ScalingConfig
+
+if sys.version_info >= (3, 12):
+    # Skip this test in Python 3.12+ because TensorFlow is not supported.
+    sys.exit(0)
+else:
+    import tensorflow as tf
+
+    from ray.air.integrations.keras import ReportCheckpointCallback
+    from ray.train.tensorflow import TensorflowTrainer
+
+
+def build_model() -> tf.keras.Model:
+    model = tf.keras.Sequential(
+        [
+            tf.keras.layers.InputLayer(input_shape=(100,)),
+            tf.keras.layers.Dense(10),
+            tf.keras.layers.Dense(1),
+        ]
+    )
+    return model
+
+
+def train_func(config: dict):
+    batch_size = config.get("batch_size", 64)
+    epochs = config.get("epochs", 3)
+
+    strategy = tf.distribute.MultiWorkerMirroredStrategy()
+    with strategy.scope():
+        # Model building/compiling need to be within `strategy.scope()`.
+        multi_worker_model = build_model()
+        multi_worker_model.compile(
+            optimizer=tf.keras.optimizers.SGD(learning_rate=config.get("lr", 1e-3)),
+            loss=tf.keras.losses.mean_absolute_error,
+            metrics=[tf.keras.metrics.mean_squared_error],
+        )
+
+    dataset = train.get_dataset_shard("train")
+
+    results = []
+    for _ in range(epochs):
+        tf_dataset = dataset.to_tf(
+            feature_columns="x", label_columns="y", batch_size=batch_size
+        )
+        history = multi_worker_model.fit(
+            tf_dataset, callbacks=[ReportCheckpointCallback()]
+        )
+        results.append(history.history)
+    return results
+
+
+def train_tensorflow_regression(num_workers: int = 2, use_gpu: bool = False) -> Result:
+    dataset = ray.data.read_csv("s3://anonymous@air-example-data/regression.csv")
+    columns_to_concatenate = [f"x{i:03}" for i in range(100)]
+    preprocessor = Concatenator(columns=columns_to_concatenate, output_column_name="x")
+    dataset = preprocessor.fit_transform(dataset)
+
+    config = {"lr": 1e-3, "batch_size": 32, "epochs": 4}
+    scaling_config = ScalingConfig(num_workers=num_workers, use_gpu=use_gpu)
+    trainer = TensorflowTrainer(
+        train_loop_per_worker=train_func,
+        train_loop_config=config,
+        scaling_config=scaling_config,
+        datasets={"train": dataset},
+    )
+    results = trainer.fit()
+    print(results.metrics)
+    return results
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Enables GPU training"
+    )
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+
+    args, _ = parser.parse_known_args()
+
+    if args.smoke_test:
+        # 2 workers, 1 for trainer, 1 for datasets
+        num_gpus = args.num_workers if args.use_gpu else 0
+        ray.init(num_cpus=4, num_gpus=num_gpus)
+        result = train_tensorflow_regression(num_workers=2, use_gpu=args.use_gpu)
+    else:
+        ray.init(address=args.address)
+        result = train_tensorflow_regression(
+            num_workers=args.num_workers, use_gpu=args.use_gpu
+        )
+    print(result)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tune_tensorflow_mnist_example.py b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tune_tensorflow_mnist_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1a1860516caa099ad4e1905212e8065a051a72f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/examples/tf/tune_tensorflow_mnist_example.py
@@ -0,0 +1,80 @@
+import argparse
+import sys
+
+import ray
+from ray import tune
+from ray.train import ScalingConfig
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner
+
+if sys.version_info >= (3, 12):
+    # Skip this test in Python 3.12+ because TensorFlow is not supported.
+    exit(0)
+else:
+    from ray.train.examples.tf.tensorflow_mnist_example import train_func
+    from ray.train.tensorflow import TensorflowTrainer
+
+
+def tune_tensorflow_mnist(
+    num_workers: int = 2, num_samples: int = 2, use_gpu: bool = False
+):
+    trainer = TensorflowTrainer(
+        train_loop_per_worker=train_func,
+        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
+    )
+    tuner = Tuner(
+        trainer,
+        tune_config=TuneConfig(num_samples=num_samples, metric="accuracy", mode="max"),
+        param_space={
+            "train_loop_config": {
+                "lr": tune.loguniform(1e-4, 1e-1),
+                "batch_size": tune.choice([32, 64, 128]),
+                "epochs": 3,
+            }
+        },
+    )
+    best_accuracy = tuner.fit().get_best_result().metrics["accuracy"]
+    print(f"Best accuracy config: {best_accuracy}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.",
+    )
+    parser.add_argument(
+        "--address", required=False, type=str, help="the address to use for Ray"
+    )
+    parser.add_argument(
+        "--num-workers",
+        "-n",
+        type=int,
+        default=2,
+        help="Sets number of workers for training.",
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=2,
+        help="Sets number of samples for training.",
+    )
+    parser.add_argument(
+        "--use-gpu", action="store_true", default=False, help="Enables GPU training"
+    )
+
+    args = parser.parse_args()
+
+    if args.smoke_test:
+        num_gpus = args.num_workers if args.use_gpu else 0
+        ray.init(num_cpus=8, num_gpus=num_gpus)
+        tune_tensorflow_mnist(num_workers=2, num_samples=2, use_gpu=args.use_gpu)
+    else:
+        ray.init(address=args.address)
+        tune_tensorflow_mnist(
+            num_workers=args.num_workers,
+            num_samples=args.num_samples,
+            use_gpu=args.use_gpu,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a156e133d5facccb9b06707d4b6f15dad509ca17
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__init__.py
@@ -0,0 +1,32 @@
+# isort: off
+try:
+    import tensorflow as tf  # noqa: F401
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        "TensorFlow isn't installed. To install TensorFlow, run 'pip install "
+        "tensorflow'."
+    )
+# isort: on
+
+from ray.train.tensorflow.config import TensorflowConfig
+from ray.train.tensorflow.tensorflow_checkpoint import TensorflowCheckpoint
+from ray.train.tensorflow.tensorflow_predictor import TensorflowPredictor
+from ray.train.tensorflow.tensorflow_trainer import TensorflowTrainer
+from ray.train.tensorflow.train_loop_utils import prepare_dataset_shard
+from ray.train.v2._internal.constants import is_v2_enabled
+
+if is_v2_enabled():
+    from ray.train.v2.tensorflow.tensorflow_trainer import (  # noqa: F811
+        TensorflowTrainer,
+    )
+
+__all__ = [
+    "TensorflowCheckpoint",
+    "TensorflowConfig",
+    "prepare_dataset_shard",
+    "TensorflowPredictor",
+    "TensorflowTrainer",
+]
+
+
+# DO NOT ADD ANYTHING AFTER THIS LINE.
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41eb7fb57d4cd1cbe7b1156b24230b6249c58102
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/config.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/config.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a58e0cb730d22bb89aefe2e2a1b21ea97641e65
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/config.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/keras.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/keras.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7fef580de1524d91ac66faeb98e52194849a8d27
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/keras.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_checkpoint.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_checkpoint.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9f0565957bbac75d5a9effa59f750339ac5ced6c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_checkpoint.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_predictor.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_predictor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..441347076e5fd549356f418f6ab99181697cdeb9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_predictor.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_trainer.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_trainer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..feb05e482f109a60f40ff71b472f5719f1ef183d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/tensorflow_trainer.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/train_loop_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/train_loop_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e4e55a4baaf6ae3916697a7b991f8f48a365e6a
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/__pycache__/train_loop_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/config.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae3baedb2a6fef55e65fb30af17d92d7e7192a93
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/config.py
@@ -0,0 +1,60 @@
+import json
+import logging
+import os
+from dataclasses import dataclass
+from typing import List
+
+import ray
+from ray.train._internal.utils import get_address_and_port
+from ray.train._internal.worker_group import WorkerGroup
+from ray.train.backend import Backend, BackendConfig
+from ray.util import PublicAPI
+
+logger = logging.getLogger(__name__)
+
+
+@PublicAPI(stability="beta")
+@dataclass
+class TensorflowConfig(BackendConfig):
+    @property
+    def backend_cls(self):
+        return _TensorflowBackend
+
+
+def _setup_tensorflow_environment(worker_addresses: List[str], index: int):
+    """Set up distributed Tensorflow training information.
+
+    This function should be called on each worker.
+
+    Args:
+        worker_addresses: Addresses of all the workers.
+        index: Index (i.e. world rank) of the current worker.
+    """
+    tf_config = {
+        "cluster": {"worker": worker_addresses},
+        "task": {"type": "worker", "index": index},
+    }
+    os.environ["TF_CONFIG"] = json.dumps(tf_config)
+
+
+class _TensorflowBackend(Backend):
+    def on_start(self, worker_group: WorkerGroup, backend_config: TensorflowConfig):
+        # Compute URL for initializing distributed setup.
+        def get_url():
+            address, port = get_address_and_port()
+            return f"{address}:{port}"
+
+        urls = worker_group.execute(get_url)
+
+        # Get setup tasks in order to throw errors on failure.
+        setup_futures = []
+        for i in range(len(worker_group)):
+            setup_futures.append(
+                worker_group.execute_single_async(
+                    i,
+                    _setup_tensorflow_environment,
+                    worker_addresses=urls,
+                    index=i,
+                )
+            )
+        ray.get(setup_futures)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/keras.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/keras.py
new file mode 100644
index 0000000000000000000000000000000000000000..3594779c8db1801e98aca325c7a40deff299cc56
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/keras.py
@@ -0,0 +1,3 @@
+from ray.air.integrations.keras import ReportCheckpointCallback
+
+ReportCheckpointCallback.__module__ = "ray.train.tensorflow.keras"
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_checkpoint.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..183af6e843deea8d3322d7c9bb729961f3be3aa3
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_checkpoint.py
@@ -0,0 +1,155 @@
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+
+import tensorflow as tf
+from tensorflow import keras
+
+from ray.train._internal.framework_checkpoint import FrameworkCheckpoint
+from ray.util.annotations import PublicAPI
+
+if TYPE_CHECKING:
+    from ray.data.preprocessor import Preprocessor
+
+
+@PublicAPI(stability="beta")
+class TensorflowCheckpoint(FrameworkCheckpoint):
+    """A :py:class:`~ray.train.Checkpoint` with TensorFlow-specific functionality."""
+
+    MODEL_FILENAME_KEY = "_model_filename"
+
+    @classmethod
+    def from_model(
+        cls,
+        model: keras.Model,
+        *,
+        preprocessor: Optional["Preprocessor"] = None,
+    ) -> "TensorflowCheckpoint":
+        """Create a :py:class:`~ray.train.Checkpoint` that stores a Keras model.
+
+        The checkpoint created with this method needs to be paired with
+        `model` when used.
+
+        Args:
+            model: The Keras model, whose weights are stored in the checkpoint.
+            preprocessor: A fitted preprocessor to be applied before inference.
+
+        Returns:
+            A :py:class:`TensorflowCheckpoint` containing the specified model.
+
+        Examples:
+
+            .. testcode::
+
+                from ray.train.tensorflow import TensorflowCheckpoint
+                import tensorflow as tf
+
+                model = tf.keras.applications.resnet.ResNet101()
+                checkpoint = TensorflowCheckpoint.from_model(model)
+
+            .. testoutput::
+                :options: +MOCK
+                :hide:
+
+                ...  # Model may or may not be downloaded
+
+        """
+        tempdir = tempfile.mkdtemp()
+        filename = "model.keras"
+        model.save(Path(tempdir, filename).as_posix())
+
+        checkpoint = cls.from_directory(tempdir)
+        if preprocessor:
+            checkpoint.set_preprocessor(preprocessor)
+        checkpoint.update_metadata({cls.MODEL_FILENAME_KEY: filename})
+        return checkpoint
+
+    @classmethod
+    def from_h5(
+        cls, file_path: str, *, preprocessor: Optional["Preprocessor"] = None
+    ) -> "TensorflowCheckpoint":
+        """Create a :py:class:`~ray.train.Checkpoint` that stores a Keras
+        model from H5 format.
+
+        The checkpoint generated by this method contains all the information needed.
+        Thus no `model` is needed to be supplied when using this checkpoint.
+
+        Args:
+            file_path: The path to the .h5 file to load model from. This is the
+                same path that is used for ``model.save(path)``.
+            preprocessor: A fitted preprocessor to be applied before inference.
+
+        Returns:
+            A :py:class:`TensorflowCheckpoint` converted from h5 format.
+
+        """
+        if not os.path.isfile(file_path) or not file_path.endswith(".h5"):
+            raise ValueError(
+                "Please supply a h5 file path to `TensorflowCheckpoint.from_h5()`."
+            )
+        tempdir = tempfile.mkdtemp()
+        filename = os.path.basename(file_path)
+        new_checkpoint_file = Path(tempdir, filename).as_posix()
+        shutil.copy(file_path, new_checkpoint_file)
+
+        checkpoint = cls.from_directory(tempdir)
+        if preprocessor:
+            checkpoint.set_preprocessor(preprocessor)
+        checkpoint.update_metadata({cls.MODEL_FILENAME_KEY: filename})
+        return checkpoint
+
+    @classmethod
+    def from_saved_model(
+        cls, dir_path: str, *, preprocessor: Optional["Preprocessor"] = None
+    ) -> "TensorflowCheckpoint":
+        """Create a :py:class:`~ray.train.Checkpoint` that stores a Keras
+        model from SavedModel format.
+
+        The checkpoint generated by this method contains all the information needed.
+        Thus no `model` is needed to be supplied when using this checkpoint.
+
+        Args:
+            dir_path: The directory containing the saved model. This is the same
+                directory as used by ``model.save(dir_path)``.
+            preprocessor: A fitted preprocessor to be applied before inference.
+
+        Returns:
+            A :py:class:`TensorflowCheckpoint` converted from SavedModel format.
+
+        """
+        if not os.path.isdir(dir_path):
+            raise ValueError(
+                "Please supply a directory to `TensorflowCheckpoint.from_saved_model`"
+            )
+        tempdir = tempfile.mkdtemp()
+        # TODO(ml-team): Replace this with copytree()
+        os.rmdir(tempdir)
+        shutil.copytree(dir_path, tempdir)
+
+        checkpoint = cls.from_directory(tempdir)
+        if preprocessor:
+            checkpoint.set_preprocessor(preprocessor)
+        # NOTE: The entire directory is the checkpoint.
+        checkpoint.update_metadata({cls.MODEL_FILENAME_KEY: "."})
+        return checkpoint
+
+    def get_model(
+        self,
+    ) -> tf.keras.Model:
+        """Retrieve the model stored in this checkpoint.
+
+        Returns:
+            The Tensorflow Keras model stored in the checkpoint.
+        """
+        metadata = self.get_metadata()
+        if self.MODEL_FILENAME_KEY not in metadata:
+            raise ValueError(
+                "`TensorflowCheckpoint` cannot retrieve the model if you override the "
+                "checkpoint metadata. Please use `Checkpoint.update_metadata` instead."
+            )
+        model_filename = metadata[self.MODEL_FILENAME_KEY]
+        with self.as_directory() as checkpoint_dir:
+            model_path = Path(checkpoint_dir, model_filename).as_posix()
+            return keras.models.load_model(model_path)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_predictor.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab353c333727b8310da1d6f388eb1641d27f26e5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_predictor.py
@@ -0,0 +1,247 @@
+import logging
+from typing import TYPE_CHECKING, Callable, Dict, Optional, Type, Union
+
+import numpy as np
+import tensorflow as tf
+
+from ray.air._internal.tensorflow_utils import convert_ndarray_batch_to_tf_tensor_batch
+from ray.train._internal.dl_predictor import DLPredictor
+from ray.train.predictor import DataBatchType
+from ray.train.tensorflow import TensorflowCheckpoint
+from ray.util import log_once
+from ray.util.annotations import DeveloperAPI, PublicAPI
+
+if TYPE_CHECKING:
+    from ray.data.preprocessor import Preprocessor
+
+logger = logging.getLogger(__name__)
+
+
+@PublicAPI(stability="beta")
+class TensorflowPredictor(DLPredictor):
+    """A predictor for TensorFlow models.
+
+    Args:
+        model: A Tensorflow Keras model to use for predictions.
+        preprocessor: A preprocessor used to transform data batches prior
+            to prediction.
+        model_weights: List of weights to use for the model.
+        use_gpu: If set, the model will be moved to GPU on instantiation and
+            prediction happens on GPU.
+    """
+
+    def __init__(
+        self,
+        *,
+        model: Optional[tf.keras.Model] = None,
+        preprocessor: Optional["Preprocessor"] = None,
+        use_gpu: bool = False,
+    ):
+        self.use_gpu = use_gpu
+        # TensorFlow model objects cannot be pickled, therefore we use
+        # a callable that returns the model and initialize it here,
+        # instead of having an initialized model object as an attribute.
+        # Predictors are not serializable (see the implementation of __reduce__)
+        # in the Predictor class, so we can safely store the initialized model
+        # as an attribute.
+        if use_gpu:
+            # TODO (jiaodong): #26249 Use multiple GPU devices with sharded input
+            with tf.device("GPU:0"):
+                self._model = model
+        else:
+            self._model = model
+            gpu_devices = tf.config.list_physical_devices("GPU")
+            if len(gpu_devices) > 0 and log_once("tf_predictor_not_using_gpu"):
+                logger.warning(
+                    "You have `use_gpu` as False but there are "
+                    f"{len(gpu_devices)} GPUs detected on host where "
+                    "prediction will only use CPU. Please consider explicitly "
+                    "setting `TensorflowPredictor(use_gpu=True)` or "
+                    "`batch_predictor.predict(ds, num_gpus_per_worker=1)` to "
+                    "enable GPU prediction."
+                )
+        super().__init__(preprocessor)
+
+    def __repr__(self):
+        fn_name = getattr(self._model, "__name__", self._model)
+        fn_name_str = ""
+        if fn_name:
+            fn_name_str = str(fn_name)[:40]
+        return (
+            f"{self.__class__.__name__}("
+            f"model={fn_name_str!r}, "
+            f"preprocessor={self._preprocessor!r}, "
+            f"use_gpu={self.use_gpu!r})"
+        )
+
+    @classmethod
+    def from_checkpoint(
+        cls,
+        checkpoint: TensorflowCheckpoint,
+        model_definition: Optional[
+            Union[Callable[[], tf.keras.Model], Type[tf.keras.Model]]
+        ] = None,
+        use_gpu: Optional[bool] = False,
+    ) -> "TensorflowPredictor":
+        """Instantiate the predictor from a TensorflowCheckpoint.
+
+        Args:
+            checkpoint: The checkpoint to load the model and preprocessor from.
+            model_definition: A callable that returns a TensorFlow Keras model
+                to use. Model weights will be loaded from the checkpoint.
+                This is only needed if the `checkpoint` was created from
+                `TensorflowCheckpoint.from_model`.
+            use_gpu: Whether GPU should be used during prediction.
+        """
+        if model_definition:
+            raise DeprecationWarning(
+                "`model_definition` is deprecated. `TensorflowCheckpoint.from_model` "
+                "now saves the full model definition in .keras format."
+            )
+
+        model = checkpoint.get_model()
+        preprocessor = checkpoint.get_preprocessor()
+        return cls(
+            model=model,
+            preprocessor=preprocessor,
+            use_gpu=use_gpu,
+        )
+
+    @DeveloperAPI
+    def call_model(
+        self, inputs: Union[tf.Tensor, Dict[str, tf.Tensor]]
+    ) -> Union[tf.Tensor, Dict[str, tf.Tensor]]:
+        """Runs inference on a single batch of tensor data.
+
+        This method is called by `TorchPredictor.predict` after converting the
+        original data batch to torch tensors.
+
+        Override this method to add custom logic for processing the model input or
+        output.
+
+        Example:
+
+            .. testcode::
+
+                # List outputs are not supported by default TensorflowPredictor.
+                def build_model() -> tf.keras.Model:
+                    input = tf.keras.layers.Input(shape=1)
+                    model = tf.keras.models.Model(inputs=input, outputs=[input, input])
+                    return model
+
+                # Use a custom predictor to format model output as a dict.
+                class CustomPredictor(TensorflowPredictor):
+                    def call_model(self, inputs):
+                        model_output = super().call_model(inputs)
+                        return {
+                            str(i): model_output[i] for i in range(len(model_output))
+                        }
+
+                import numpy as np
+                data_batch = np.array([[0.5], [0.6], [0.7]], dtype=np.float32)
+
+                predictor = CustomPredictor(model=build_model())
+                predictions = predictor.predict(data_batch)
+
+        Args:
+            inputs: A batch of data to predict on, represented as either a single
+                TensorFlow tensor or for multi-input models, a dictionary of tensors.
+
+        Returns:
+            The model outputs, either as a single tensor or a dictionary of tensors.
+
+        """
+        if self.use_gpu:
+            with tf.device("GPU:0"):
+                return self._model(inputs)
+        else:
+            return self._model(inputs)
+
+    def predict(
+        self,
+        data: DataBatchType,
+        dtype: Optional[Union[tf.dtypes.DType, Dict[str, tf.dtypes.DType]]] = None,
+    ) -> DataBatchType:
+        """Run inference on data batch.
+
+        If the provided data is a single array or a dataframe/table with a single
+        column, it will be converted into a single Tensorflow tensor before being
+        inputted to the model.
+
+        If the provided data is a multi-column table or a dict of numpy arrays,
+        it will be converted into a dict of tensors before being inputted to the
+        model. This is useful for multi-modal inputs (for example your model accepts
+        both image and text).
+
+        Args:
+            data: A batch of input data. Either a pandas DataFrame or numpy
+                array.
+            dtype: The dtypes to use for the tensors. Either a single dtype for all
+                tensors or a mapping from column name to dtype.
+
+        Examples:
+
+        .. testcode::
+
+            import numpy as np
+            import tensorflow as tf
+            from ray.train.tensorflow import TensorflowPredictor
+
+            def build_model():
+                return tf.keras.Sequential(
+                    [
+                        tf.keras.layers.InputLayer(input_shape=()),
+                        tf.keras.layers.Flatten(),
+                        tf.keras.layers.Dense(1),
+                    ]
+                )
+
+            weights = [np.array([[2.0]]), np.array([0.0])]
+            predictor = TensorflowPredictor(model=build_model())
+
+            data = np.asarray([1, 2, 3])
+            predictions = predictor.predict(data)
+
+            import pandas as pd
+            import tensorflow as tf
+            from ray.train.tensorflow import TensorflowPredictor
+
+            def build_model():
+                input1 = tf.keras.layers.Input(shape=(1,), name="A")
+                input2 = tf.keras.layers.Input(shape=(1,), name="B")
+                merged = tf.keras.layers.Concatenate(axis=1)([input1, input2])
+                output = tf.keras.layers.Dense(2, input_dim=2)(merged)
+                return tf.keras.models.Model(
+                    inputs=[input1, input2], outputs=output)
+
+            predictor = TensorflowPredictor(model=build_model())
+
+            # Pandas dataframe.
+            data = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+
+            predictions = predictor.predict(data)
+
+        Returns:
+            DataBatchType: Prediction result. The return type will be the same as the
+                input type.
+        """
+        return super(TensorflowPredictor, self).predict(data=data, dtype=dtype)
+
+    def _arrays_to_tensors(
+        self,
+        numpy_arrays: Union[np.ndarray, Dict[str, np.ndarray]],
+        dtype: Optional[Union[tf.dtypes.DType, Dict[str, tf.dtypes.DType]]],
+    ) -> Union[tf.Tensor, Dict[str, tf.Tensor]]:
+        return convert_ndarray_batch_to_tf_tensor_batch(numpy_arrays, dtypes=dtype)
+
+    def _tensor_to_array(self, tensor: tf.Tensor) -> np.ndarray:
+        if not isinstance(tensor, tf.Tensor):
+            raise ValueError(
+                "Expected the model to return either a tf.Tensor or a "
+                f"dict of tf.Tensor, but got {type(tensor)} instead. "
+                f"To support models with different output types, subclass "
+                f"TensorflowPredictor and override the `call_model` method "
+                f"to process the output into either torch.Tensor or Dict["
+                f"str, torch.Tensor]."
+            )
+        return tensor.numpy()
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_trainer.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a613edd2a4b3a6b7569198ca41f78284d3f29d6
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/tensorflow_trainer.py
@@ -0,0 +1,193 @@
+from typing import Any, Callable, Dict, Optional, Union
+
+from ray.train import Checkpoint, DataConfig, RunConfig, ScalingConfig
+from ray.train.data_parallel_trainer import DataParallelTrainer
+from ray.train.tensorflow.config import TensorflowConfig
+from ray.train.trainer import GenDataset
+from ray.util import PublicAPI
+
+
+@PublicAPI(stability="beta")
+class TensorflowTrainer(DataParallelTrainer):
+    """A Trainer for data parallel Tensorflow training.
+
+    This Trainer runs the function ``train_loop_per_worker`` on multiple Ray
+    Actors. These actors already have the necessary TensorFlow process group already
+    configured for distributed TensorFlow training.
+
+    The ``train_loop_per_worker`` function is expected to take in either 0 or 1
+    arguments:
+
+    .. testcode::
+
+        def train_loop_per_worker():
+            ...
+
+    .. testcode::
+
+        def train_loop_per_worker(config: Dict):
+            ...
+
+    If ``train_loop_per_worker`` accepts an argument, then
+    ``train_loop_config`` will be passed in as the argument. This is useful if you
+    want to tune the values in ``train_loop_config`` as hyperparameters.
+
+    If the ``datasets`` dict contains a training dataset (denoted by
+    the "train" key), then it will be split into multiple dataset
+    shards that can then be accessed by ``ray.train.get_dataset_shard("train")`` inside
+    ``train_loop_per_worker``. All the other datasets will not be split and
+    ``ray.train.get_dataset_shard(...)`` will return the the entire Dataset.
+
+    Inside the ``train_loop_per_worker`` function, you can use any of the
+    :ref:`Ray Train loop methods <train-loop-api>`.
+
+    .. warning::
+        Ray will not automatically set any environment variables or configuration
+        related to local parallelism / threading
+        :ref:`aside from "OMP_NUM_THREADS" <omp-num-thread-note>`.
+        If you desire greater control over TensorFlow threading, use
+        the ``tf.config.threading`` module (eg.
+        ``tf.config.threading.set_inter_op_parallelism_threads(num_cpus)``)
+        at the beginning of your ``train_loop_per_worker`` function.
+
+
+    .. testcode::
+
+        from ray import train
+
+        def train_loop_per_worker():
+            # Report intermediate results for callbacks or logging and
+            # checkpoint data.
+            train.report(...)
+
+            # Returns dict of last saved checkpoint.
+            train.get_checkpoint()
+
+            # Returns the Dataset shard for the given key.
+            train.get_dataset_shard("my_dataset")
+
+            # Returns the total number of workers executing training.
+            train.get_context().get_world_size()
+
+            # Returns the rank of this worker.
+            train.get_context().get_world_rank()
+
+            # Returns the rank of the worker on the current node.
+            train.get_context().get_local_rank()
+
+    Any returns from the ``train_loop_per_worker`` will be discarded and not
+    used or persisted anywhere.
+
+    To save a model to use for the ``TensorflowPredictor``, you must save it under the
+    "model" kwarg in ``Checkpoint`` passed to ``train.report()``.
+
+    Example:
+
+    .. testcode::
+
+        import os
+        import tempfile
+        import tensorflow as tf
+
+        import ray
+        from ray import train
+        from ray.train import Checkpoint, ScalingConfig
+        from ray.train.tensorflow import TensorflowTrainer
+
+        def build_model():
+            # toy neural network : 1-layer
+            return tf.keras.Sequential(
+                [tf.keras.layers.Dense(
+                    1, activation="linear", input_shape=(1,))]
+            )
+
+        def train_loop_per_worker(config):
+            dataset_shard = train.get_dataset_shard("train")
+            strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+            with strategy.scope():
+                model = build_model()
+                model.compile(
+                    optimizer="Adam", loss="mean_squared_error", metrics=["mse"])
+
+            tf_dataset = dataset_shard.to_tf(
+                feature_columns="x",
+                label_columns="y",
+                batch_size=1
+            )
+            for epoch in range(config["num_epochs"]):
+                model.fit(tf_dataset)
+
+                # Create checkpoint.
+                checkpoint_dir = tempfile.mkdtemp()
+                model.save_weights(
+                    os.path.join(checkpoint_dir, "my_checkpoint")
+                )
+                checkpoint = Checkpoint.from_directory(checkpoint_dir)
+
+                train.report(
+                    {},
+                    checkpoint=checkpoint,
+                )
+
+        train_dataset = ray.data.from_items([{"x": x, "y": x + 1} for x in range(32)])
+        trainer = TensorflowTrainer(
+            train_loop_per_worker=train_loop_per_worker,
+            scaling_config=ScalingConfig(num_workers=3, use_gpu=True),
+            datasets={"train": train_dataset},
+            train_loop_config={"num_epochs": 2},
+        )
+        result = trainer.fit()
+
+    .. testoutput::
+        :options:+ELLIPSIS
+        :hide:
+
+        ...
+
+    Args:
+        train_loop_per_worker: The training function to execute.
+            This can either take in no arguments or a ``config`` dict.
+        train_loop_config: Configurations to pass into
+            ``train_loop_per_worker`` if it accepts an argument.
+        tensorflow_config: Configuration for setting up the TensorFlow backend.
+            If set to None, use the default configuration. This replaces the
+            ``backend_config`` arg of ``DataParallelTrainer``.
+        scaling_config: Configuration for how to scale data parallel training.
+        dataset_config: Configuration for dataset ingest.
+        run_config: Configuration for the execution of the training run.
+        datasets: Any Datasets to use for training. Use
+            the key "train" to denote which dataset is the training
+            dataset.
+        resume_from_checkpoint: A checkpoint to resume training from.
+        metadata: Dict that should be made available via
+            `ray.train.get_context().get_metadata()` and in `checkpoint.get_metadata()`
+            for checkpoints saved from this Trainer. Must be JSON-serializable.
+    """
+
+    def __init__(
+        self,
+        train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]],
+        *,
+        train_loop_config: Optional[Dict] = None,
+        tensorflow_config: Optional[TensorflowConfig] = None,
+        scaling_config: Optional[ScalingConfig] = None,
+        dataset_config: Optional[DataConfig] = None,
+        run_config: Optional[RunConfig] = None,
+        datasets: Optional[Dict[str, GenDataset]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        resume_from_checkpoint: Optional[Checkpoint] = None,
+    ):
+        if not tensorflow_config:
+            tensorflow_config = TensorflowConfig()
+
+        super(TensorflowTrainer, self).__init__(
+            train_loop_per_worker=train_loop_per_worker,
+            train_loop_config=train_loop_config,
+            backend_config=tensorflow_config,
+            scaling_config=scaling_config,
+            dataset_config=dataset_config,
+            run_config=run_config,
+            datasets=datasets,
+            resume_from_checkpoint=resume_from_checkpoint,
+            metadata=metadata,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/train/tensorflow/train_loop_utils.py b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/train_loop_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7862d6a365f14d16cf6ad85e17ce7dc92ad5c2d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/tensorflow/train_loop_utils.py
@@ -0,0 +1,27 @@
+import tensorflow as tf
+
+from ray.util.annotations import PublicAPI
+
+
+@PublicAPI(stability="beta")
+def prepare_dataset_shard(tf_dataset_shard: tf.data.Dataset):
+    """A utility function that overrides default config for Tensorflow Dataset.
+
+    This should be used on a TensorFlow ``Dataset`` created by calling
+    ``iter_tf_batches()`` on a ``ray.data.Dataset`` returned by
+    ``ray.train.get_dataset_shard()`` since the dataset has already
+    been sharded across the workers.
+
+    Args:
+        tf_dataset_shard (tf.data.Dataset): A TensorFlow Dataset.
+
+    Returns:
+        A TensorFlow Dataset with:
+            - autosharding turned off
+            - prefetching turned on with autotune enabled
+    """
+    options = tf.data.Options()
+    options.experimental_distribute.auto_shard_policy = (
+        tf.data.experimental.AutoShardPolicy.OFF
+    )
+    return tf_dataset_shard.with_options(options).prefetch(tf.data.AUTOTUNE)
diff --git a/.venv/lib/python3.11/site-packages/ray/train/util/__init__.py b/.venv/lib/python3.11/site-packages/ray/train/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f373b95bbd270009d820abab7f1e3c8324c6fa9f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/train/util/__init__.py
@@ -0,0 +1,7 @@
+from ray.air.util.check_ingest import DummyTrainer
+
+__all__ = [
+    "DummyTrainer",
+]
+
+DummyTrainer.__module__ = "ray.train.util"
diff --git a/.venv/lib/python3.11/site-packages/ray/train/util/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/train/util/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70585d22e2410f2204d18d37b4a5d49e71cedd58
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/train/util/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/execution/__pycache__/tune_controller.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/execution/__pycache__/tune_controller.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7ee628580d634a780d380bbb929c5cead437fda
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/execution/__pycache__/tune_controller.cpython-311.pyc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88ae169516de2b1392499b271b75dfe4dd32869d5222a0dcf49c53d7f2e56589
+size 103903
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/schedulers/__pycache__/pb2_utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/schedulers/__pycache__/pb2_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b238233a23441ee67d0a22e03dfc035547f28b3c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/schedulers/__pycache__/pb2_utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/callback.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..b53063b85ab98a46074b52fc20f53e0dcaf93395
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/callback.py
@@ -0,0 +1,143 @@
+import logging
+import os
+from typing import TYPE_CHECKING, Collection, List, Optional, Type, Union
+
+from ray.tune.callback import Callback, CallbackList
+from ray.tune.logger import (
+    CSVLogger,
+    CSVLoggerCallback,
+    JsonLogger,
+    JsonLoggerCallback,
+    LegacyLoggerCallback,
+    TBXLogger,
+    TBXLoggerCallback,
+)
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from ray.tune.experimental.output import AirVerbosity
+
+DEFAULT_CALLBACK_CLASSES = (
+    CSVLoggerCallback,
+    JsonLoggerCallback,
+    TBXLoggerCallback,
+)
+
+
+def _get_artifact_templates_for_callbacks(
+    callbacks: Union[List[Callback], List[Type[Callback]], CallbackList]
+) -> List[str]:
+    templates = []
+    for callback in callbacks:
+        templates += list(callback._SAVED_FILE_TEMPLATES)
+    return templates
+
+
+def _create_default_callbacks(
+    callbacks: Optional[List[Callback]],
+    *,
+    air_verbosity: Optional["AirVerbosity"] = None,
+    entrypoint: Optional[str] = None,
+    metric: Optional[str] = None,
+    mode: Optional[str] = None,
+    config: Optional[dict] = None,
+    progress_metrics: Optional[Collection[str]] = None,
+) -> List[Callback]:
+    """Create default callbacks for `Tuner.fit()`.
+
+    This function takes a list of existing callbacks and adds default
+    callbacks to it.
+
+    Specifically, three kinds of callbacks will be added:
+
+    1. Loggers. Ray Tune's experiment analysis relies on CSV and JSON logging.
+    2. Syncer. Ray Tune synchronizes logs and checkpoint between workers and
+       the head node.
+    2. Trial progress reporter. For reporting intermediate progress, like trial
+       results, Ray Tune uses a callback.
+
+    These callbacks will only be added if they don't already exist, i.e. if
+    they haven't been passed (and configured) by the user. A notable case
+    is when a Logger is passed, which is not a CSV or JSON logger - then
+    a CSV and JSON logger will still be created.
+
+    Lastly, this function will ensure that the Syncer callback comes after all
+    Logger callbacks, to ensure that the most up-to-date logs and checkpoints
+    are synced across nodes.
+
+    """
+    callbacks = callbacks or []
+    has_csv_logger = False
+    has_json_logger = False
+    has_tbx_logger = False
+
+    from ray.tune.progress_reporter import TrialProgressCallback
+
+    has_trial_progress_callback = any(
+        isinstance(c, TrialProgressCallback) for c in callbacks
+    )
+
+    if has_trial_progress_callback and air_verbosity is not None:
+        logger.warning(
+            "AIR_VERBOSITY is set, ignoring passed-in TrialProgressCallback."
+        )
+        new_callbacks = [
+            c for c in callbacks if not isinstance(c, TrialProgressCallback)
+        ]
+        callbacks = new_callbacks
+    if air_verbosity is not None:  # new flow
+        from ray.tune.experimental.output import (
+            _detect_reporter as _detect_air_reporter,
+        )
+
+        air_progress_reporter = _detect_air_reporter(
+            air_verbosity,
+            num_samples=1,  # Update later with setup()
+            entrypoint=entrypoint,
+            metric=metric,
+            mode=mode,
+            config=config,
+            progress_metrics=progress_metrics,
+        )
+        callbacks.append(air_progress_reporter)
+    elif not has_trial_progress_callback:  # old flow
+        trial_progress_callback = TrialProgressCallback(
+            metric=metric, progress_metrics=progress_metrics
+        )
+        callbacks.append(trial_progress_callback)
+
+    # Check if we have a CSV, JSON and TensorboardX logger
+    for i, callback in enumerate(callbacks):
+        if isinstance(callback, LegacyLoggerCallback):
+            if CSVLogger in callback.logger_classes:
+                has_csv_logger = True
+            if JsonLogger in callback.logger_classes:
+                has_json_logger = True
+            if TBXLogger in callback.logger_classes:
+                has_tbx_logger = True
+        elif isinstance(callback, CSVLoggerCallback):
+            has_csv_logger = True
+        elif isinstance(callback, JsonLoggerCallback):
+            has_json_logger = True
+        elif isinstance(callback, TBXLoggerCallback):
+            has_tbx_logger = True
+
+    # If CSV, JSON or TensorboardX loggers are missing, add
+    if os.environ.get("TUNE_DISABLE_AUTO_CALLBACK_LOGGERS", "0") != "1":
+        if not has_csv_logger:
+            callbacks.append(CSVLoggerCallback())
+        if not has_json_logger:
+            callbacks.append(JsonLoggerCallback())
+        if not has_tbx_logger:
+            try:
+                callbacks.append(TBXLoggerCallback())
+            except ImportError:
+                logger.warning(
+                    "The TensorboardX logger cannot be instantiated because "
+                    "either TensorboardX or one of it's dependencies is not "
+                    "installed. Please make sure you have the latest version "
+                    "of TensorboardX installed: `pip install -U tensorboardx`"
+                )
+
+    return callbacks
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/file_transfer.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/file_transfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d742a91eb9a9c52bbb77f31118b280a8b2506beb
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/file_transfer.py
@@ -0,0 +1,481 @@
+import fnmatch
+import io
+import os
+import shutil
+import tarfile
+from typing import Dict, Generator, List, Optional, Tuple, Union
+
+import ray
+from ray.air._internal.filelock import TempFileLock
+from ray.air.util.node import _force_on_node, _get_node_id_from_node_ip
+from ray.util.annotations import DeveloperAPI
+
+_DEFAULT_CHUNK_SIZE_BYTES = 500 * 1024 * 1024  # 500 MiB
+_DEFAULT_MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024  # 1 GiB
+
+
+@DeveloperAPI
+def sync_dir_between_nodes(
+    source_ip: str,
+    source_path: str,
+    target_ip: str,
+    target_path: str,
+    force_all: bool = False,
+    exclude: Optional[List] = None,
+    chunk_size_bytes: int = _DEFAULT_CHUNK_SIZE_BYTES,
+    max_size_bytes: Optional[int] = _DEFAULT_MAX_SIZE_BYTES,
+    return_futures: bool = False,
+) -> Union[
+    None,
+    Tuple[ray.ObjectRef, ray.ActorID, ray.ObjectRef],
+    Tuple[ray.ObjectRef, None, None],
+]:
+    """Synchronize directory on source node to directory on target node.
+
+    Per default, this function will collect information about already existing
+    files in the target directory. Only files that differ in either mtime or
+    filesize will be transferred, unless ``force_all=True``.
+
+    If ``source_ip==target_ip``, shutil will be used to copy the directory. Otherwise,
+    the directory will be packed and sent through the Ray Object Store to the target
+    node.
+
+    Args:
+        source_ip: IP of source node.
+        source_path: Path to directory on source node.
+        target_ip: IP of target node.
+        target_path: Path to directory on target node.
+        force_all: If True, all files will be transferred (not just differing files).
+            Ignored if ``source_ip==target_ip``.
+        exclude: Pattern of files to exclude, e.g.
+            ``["*/checkpoint_*]`` to exclude trial checkpoints.
+        chunk_size_bytes: Chunk size for data transfer. Ignored if
+            ``source_ip==target_ip``.
+        max_size_bytes: If packed data exceeds this value, raise an error before
+            transfer. If ``None``, no limit is enforced. Ignored if
+            ``source_ip==target_ip``.
+        return_futures: If True, returns a tuple of the unpack future,
+            the pack actor, and the files_stats future. If False (default) will
+            block until synchronization finished and return None.
+
+    Returns:
+        None, or Tuple of unpack future, pack actor, and files_stats future.
+        If ``source_ip==target_ip``, pack actor and files_stats future will be None.
+
+    """
+    if source_ip != target_ip:
+        return _sync_dir_between_different_nodes(
+            source_ip=source_ip,
+            source_path=source_path,
+            target_ip=target_ip,
+            target_path=target_path,
+            force_all=force_all,
+            exclude=exclude,
+            chunk_size_bytes=chunk_size_bytes,
+            max_size_bytes=max_size_bytes,
+            return_futures=return_futures,
+        )
+    elif source_path != target_path:
+        ret = _sync_dir_on_same_node(
+            ip=source_ip,
+            source_path=source_path,
+            target_path=target_path,
+            exclude=exclude,
+            return_futures=return_futures,
+        )
+        if return_futures:
+            return ret, None, None
+        return ret
+
+
+def _sync_dir_on_same_node(
+    ip: str,
+    source_path: str,
+    target_path: str,
+    exclude: Optional[List] = None,
+    return_futures: bool = False,
+) -> Optional[ray.ObjectRef]:
+    """Synchronize directory to another directory on the same node.
+
+    Per default, this function will collect information about already existing
+    files in the target directory. All files will be copied over.
+
+    Args:
+        ip: IP of the node.
+        source_path: Path to source directory.
+        target_path: Path to target directory.
+        exclude: Pattern of files to exclude, e.g.
+            ``["*/checkpoint_*]`` to exclude trial checkpoints.
+        return_futures: If True, returns a future of the copy task.
+
+    Returns:
+        None, or future of the copy task.
+
+    """
+
+    node_id = _get_node_id_from_node_ip(ip)
+
+    copy_on_node = _remote_copy_dir.options(num_cpus=0, **_force_on_node(node_id))
+    copy_future = copy_on_node.remote(
+        source_dir=source_path, target_dir=target_path, exclude=exclude
+    )
+
+    if return_futures:
+        return copy_future
+
+    return ray.get(copy_future)
+
+
+def _sync_dir_between_different_nodes(
+    source_ip: str,
+    source_path: str,
+    target_ip: str,
+    target_path: str,
+    force_all: bool = False,
+    exclude: Optional[List] = None,
+    chunk_size_bytes: int = _DEFAULT_CHUNK_SIZE_BYTES,
+    max_size_bytes: Optional[int] = _DEFAULT_MAX_SIZE_BYTES,
+    return_futures: bool = False,
+) -> Union[None, Tuple[ray.ObjectRef, ray.ActorID, ray.ObjectRef]]:
+    """Synchronize directory on source node to directory on target node.
+
+    Per default, this function will collect information about already existing
+    files in the target directory. Only files that differ in either mtime or
+    filesize will be transferred, unless ``force_all=True``.
+
+    Args:
+        source_ip: IP of source node.
+        source_path: Path to directory on source node.
+        target_ip: IP of target node.
+        target_path: Path to directory on target node.
+        force_all: If True, all files will be transferred (not just differing files).
+        exclude: Pattern of files to exclude, e.g.
+            ``["*/checkpoint_*]`` to exclude trial checkpoints.
+        chunk_size_bytes: Chunk size for data transfer.
+        max_size_bytes: If packed data exceeds this value, raise an error before
+            transfer. If ``None``, no limit is enforced.
+        return_futures: If True, returns a tuple of the unpack future,
+            the pack actor, and the files_stats future. If False (default) will
+            block until synchronization finished and return None.
+
+    Returns:
+        None, or Tuple of unpack future, pack actor, and files_stats future.
+
+    """
+
+    source_node_id = _get_node_id_from_node_ip(source_ip)
+    target_node_id = _get_node_id_from_node_ip(target_ip)
+
+    pack_actor_on_source_node = _PackActor.options(
+        num_cpus=0, **_force_on_node(source_node_id)
+    )
+    unpack_on_target_node = _unpack_from_actor.options(
+        num_cpus=0, **_force_on_node(target_node_id)
+    )
+
+    if force_all:
+        files_stats = None
+    else:
+        files_stats = _remote_get_recursive_files_and_stats.options(
+            num_cpus=0, **_force_on_node(target_node_id)
+        ).remote(target_path)
+
+    pack_actor = pack_actor_on_source_node.remote(
+        source_dir=source_path,
+        files_stats=files_stats,
+        chunk_size_bytes=chunk_size_bytes,
+        max_size_bytes=max_size_bytes,
+        exclude=exclude,
+    )
+    unpack_future = unpack_on_target_node.remote(pack_actor, target_path)
+
+    if return_futures:
+        return unpack_future, pack_actor, files_stats
+
+    return ray.get(unpack_future)
+
+
+def _get_recursive_files_and_stats(path: str) -> Dict[str, Tuple[float, int]]:
+    """Return dict of files mapping to stats in ``path``.
+
+    This function scans a directory ``path`` recursively and returns a dict
+    mapping each contained file to a tuple of (mtime, filesize).
+
+    mtime and filesize are returned from ``os.lstat`` and are usually a
+    floating point number (timestamp) and an int (filesize in bytes).
+    """
+    files_stats = {}
+    for root, dirs, files in os.walk(path, topdown=False):
+        rel_root = os.path.relpath(root, path)
+        for file in files:
+            try:
+                key = os.path.join(rel_root, file)
+                stat = os.lstat(os.path.join(path, key))
+                files_stats[key] = stat.st_mtime, stat.st_size
+            except FileNotFoundError:
+                # Race condition: If a file is deleted while executing this
+                # method, just continue and don't include the file in the stats
+                pass
+
+    return files_stats
+
+
+# Only export once
+_remote_get_recursive_files_and_stats = ray.remote(_get_recursive_files_and_stats)
+
+
+def _pack_dir(
+    source_dir: str,
+    exclude: Optional[List] = None,
+    files_stats: Optional[Dict[str, Tuple[float, int]]] = None,
+) -> io.BytesIO:
+    """Pack whole directory contents into an uncompressed tarfile.
+
+    This function accepts a ``files_stats`` argument. If given, only files
+    whose stats differ from these stats will be packed.
+
+    The main use case for this is that we can collect information about files
+    already existing in the target directory, and only pack files that have
+    been updated. This is similar to how cloud syncing utilities decide
+    which files to transfer.
+
+    Args:
+        source_dir: Path to local directory to pack into tarfile.
+        exclude: Pattern of files to exclude, e.g.
+            ``["*/checkpoint_*]`` to exclude trial checkpoints.
+        files_stats: Dict of relative filenames mapping to a tuple of
+            (mtime, filesize). Only files that differ from these stats
+            will be packed.
+
+    Returns:
+        Tarfile as a stream object.
+    """
+
+    def _should_exclude(candidate: str) -> bool:
+        if not exclude:
+            return False
+
+        for excl in exclude:
+            if fnmatch.fnmatch(candidate, excl):
+                return True
+        return False
+
+    stream = io.BytesIO()
+    with tarfile.open(fileobj=stream, mode="w", format=tarfile.PAX_FORMAT) as tar:
+
+        if not files_stats and not exclude:
+            # If no `files_stats` is passed, pack whole directory
+            tar.add(source_dir, arcname="", recursive=True)
+        else:
+            files_stats = files_stats or {}
+            # Otherwise, only pack differing files
+            tar.add(source_dir, arcname="", recursive=False)
+            for root, dirs, files in os.walk(source_dir, topdown=False):
+                rel_root = os.path.relpath(root, source_dir)
+                # Always add all directories
+                for dir in dirs:
+                    key = os.path.join(rel_root, dir)
+                    tar.add(os.path.join(source_dir, key), arcname=key, recursive=False)
+                # Add files where our information differs
+                for file in files:
+                    key = os.path.join(rel_root, file)
+                    stat = os.lstat(os.path.join(source_dir, key))
+                    file_stat = stat.st_mtime, stat.st_size
+
+                    if _should_exclude(key):
+                        # If the file matches an exclude pattern, skip
+                        continue
+
+                    if key in files_stats and files_stats[key] == file_stat:
+                        # If the file did not change, skip
+                        continue
+
+                    tar.add(os.path.join(source_dir, key), arcname=key)
+
+    return stream
+
+
+def _gib_string(num_bytes: float) -> str:
+    return f"{float(num_bytes / 1024 ** 3):.2f}GiB"
+
+
+@ray.remote
+class _PackActor:
+    """Actor wrapping around a packing job.
+
+    This actor is used for chunking the packed data into smaller chunks that
+    can be transferred via the object store more efficiently.
+
+    The actor will start packing the directory when initialized, and separate
+    chunks can be received by calling the remote ``next()`` task.
+
+    Args:
+        source_dir: Path to local directory to pack into tarfile.
+        exclude: Pattern of files to exclude, e.g.
+            ``["*/checkpoint_*]`` to exclude trial checkpoints.
+        files_stats: Dict of relative filenames mapping to a tuple of
+            (mtime, filesize). Only files that differ from these stats
+            will be packed.
+        chunk_size_bytes: Cut bytes stream into chunks of this size in bytes.
+        max_size_bytes: If packed data exceeds this value, raise an error before
+            transfer. If ``None``, no limit is enforced.
+    """
+
+    def __init__(
+        self,
+        source_dir: str,
+        exclude: Optional[List] = None,
+        files_stats: Optional[Dict[str, Tuple[float, int]]] = None,
+        chunk_size_bytes: int = _DEFAULT_CHUNK_SIZE_BYTES,
+        max_size_bytes: Optional[int] = _DEFAULT_MAX_SIZE_BYTES,
+    ):
+        self.stream = _pack_dir(
+            source_dir=source_dir, exclude=exclude, files_stats=files_stats
+        )
+
+        # Get buffer size
+        self.stream.seek(0, 2)
+        file_size = self.stream.tell()
+
+        if max_size_bytes and file_size > max_size_bytes:
+            raise RuntimeError(
+                f"Packed directory {source_dir} content has a size of "
+                f"{_gib_string(file_size)}, which exceeds the limit "
+                f"of {_gib_string(max_size_bytes)}. Please check the directory "
+                f"contents. If you want to transfer everything, you can increase "
+                f"or disable the limit by passing the `max_size` argument."
+            )
+        self.chunk_size = chunk_size_bytes
+        self.max_size = max_size_bytes
+        self.iter = None
+
+    def get_full_data(self) -> bytes:
+        return self.stream.getvalue()
+
+    def _chunk_generator(self) -> Generator[bytes, None, None]:
+        self.stream.seek(0)
+        data = self.stream.read(self.chunk_size)
+        while data:
+            yield data
+            data = self.stream.read(self.chunk_size)
+
+    def next(self) -> Optional[bytes]:
+        if not self.iter:
+            self.iter = iter(self._chunk_generator())
+        try:
+            return next(self.iter)
+        except StopIteration:
+            return None
+
+
+def _iter_remote(actor: ray.ActorID) -> Generator[bytes, None, None]:
+    """Iterate over actor task and return as generator."""
+    while True:
+        buffer = ray.get(actor.next.remote())
+        if buffer is None:
+            return
+        yield buffer
+
+
+def _unpack_dir(stream: io.BytesIO, target_dir: str, *, _retry: bool = True) -> None:
+    """Unpack tarfile stream into target directory."""
+    stream.seek(0)
+    target_dir = os.path.normpath(target_dir)
+    try:
+        # Timeout 0 means there will be only one attempt to acquire
+        # the file lock. If it cannot be aquired, a TimeoutError
+        # will be thrown.
+        with TempFileLock(f"{target_dir}.lock", timeout=0):
+            with tarfile.open(fileobj=stream) as tar:
+                tar.extractall(target_dir)
+    except TimeoutError:
+        # wait, but do not do anything
+        with TempFileLock(f"{target_dir}.lock"):
+            pass
+        # if the dir was locked due to being deleted,
+        # recreate
+        if not os.path.exists(target_dir):
+            if _retry:
+                _unpack_dir(stream, target_dir, _retry=False)
+            else:
+                raise RuntimeError(
+                    f"Target directory {target_dir} does not exist "
+                    "and couldn't be recreated. "
+                    "Please raise an issue on GitHub: "
+                    "https://github.com/ray-project/ray/issues"
+                )
+
+
+@ray.remote
+def _unpack_from_actor(pack_actor: ray.ActorID, target_dir: str) -> None:
+    """Iterate over chunks received from pack actor and unpack."""
+    stream = io.BytesIO()
+    for buffer in _iter_remote(pack_actor):
+        stream.write(buffer)
+    _unpack_dir(stream, target_dir=target_dir)
+
+
+def _copy_dir(
+    source_dir: str,
+    target_dir: str,
+    *,
+    exclude: Optional[List] = None,
+    _retry: bool = True,
+) -> None:
+    """Copy dir with shutil on the actor."""
+    target_dir = os.path.normpath(target_dir)
+    try:
+        # Timeout 0 means there will be only one attempt to acquire
+        # the file lock. If it cannot be aquired, a TimeoutError
+        # will be thrown.
+        with TempFileLock(f"{target_dir}.lock", timeout=0):
+            _delete_path_unsafe(target_dir)
+
+            _ignore_func = None
+            if exclude:
+
+                def _ignore(path, names):
+                    ignored_names = set()
+                    rel_path = os.path.relpath(path, source_dir)
+                    for name in names:
+                        candidate = os.path.join(rel_path, name)
+                        for excl in exclude:
+                            if fnmatch.fnmatch(candidate, excl):
+                                ignored_names.add(name)
+                                break
+                    return ignored_names
+
+                _ignore_func = _ignore
+
+            shutil.copytree(source_dir, target_dir, ignore=_ignore_func)
+    except TimeoutError:
+        # wait, but do not do anything
+        with TempFileLock(f"{target_dir}.lock"):
+            pass
+        # if the dir was locked due to being deleted,
+        # recreate
+        if not os.path.exists(target_dir):
+            if _retry:
+                _copy_dir(source_dir, target_dir, _retry=False)
+            else:
+                raise RuntimeError(
+                    f"Target directory {target_dir} does not exist "
+                    "and couldn't be recreated. "
+                    "Please raise an issue on GitHub: "
+                    "https://github.com/ray-project/ray/issues"
+                )
+
+
+# Only export once
+_remote_copy_dir = ray.remote(_copy_dir)
+
+
+def _delete_path_unsafe(target_path: str):
+    """Delete path (files and directories). No filelock."""
+    if os.path.exists(target_path):
+        if os.path.isdir(target_path):
+            shutil.rmtree(target_path)
+        else:
+            os.remove(target_path)
+        return True
+    return False
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/mock.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/mock.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d7f7c8d2624d00910c170b84d27185f876f3b31
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/mock.py
@@ -0,0 +1,124 @@
+import logging
+import os
+import random
+import time
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict
+
+from ray.tune.callback import Callback
+from ray.tune.experiment import Trial
+
+logger = logging.getLogger(__name__)
+
+
+class FailureInjectorCallback(Callback):
+    """Adds random failure injection to the TrialExecutor."""
+
+    def __init__(
+        self,
+        config_path="~/ray_bootstrap_config.yaml",
+        probability=0.1,
+        time_between_checks=0,
+        disable=False,
+    ):
+        self.probability = probability
+        self.config_path = Path(config_path).expanduser().as_posix()
+        self.disable = disable
+
+        self.time_between_checks = time_between_checks
+        # Initialize with current time so we don't fail right away
+        self.last_fail_check = time.monotonic()
+
+    def on_step_begin(self, **info):
+        if not os.path.exists(self.config_path):
+            return
+        if time.monotonic() < self.last_fail_check + self.time_between_checks:
+            return
+        self.last_fail_check = time.monotonic()
+        import click
+
+        from ray.autoscaler._private.commands import kill_node
+
+        failures = 0
+        max_failures = 3
+        # With 10% probability inject failure to a worker.
+        if random.random() < self.probability and not self.disable:
+            # With 10% probability fully terminate the node.
+            should_terminate = random.random() < self.probability
+            while failures < max_failures:
+                try:
+                    kill_node(
+                        self.config_path,
+                        yes=True,
+                        hard=should_terminate,
+                        override_cluster_name=None,
+                    )
+                    return
+                except click.exceptions.ClickException:
+                    failures += 1
+                    logger.exception(
+                        "Killing random node failed in attempt "
+                        "{}. "
+                        "Retrying {} more times".format(
+                            str(failures), str(max_failures - failures)
+                        )
+                    )
+
+
+class TrialStatusSnapshot:
+    """A sequence of statuses of trials as they progress.
+
+    If all trials keep previous status, no snapshot is taken.
+    """
+
+    def __init__(self):
+        self._snapshot = []
+
+    def append(self, new_snapshot: Dict[str, str]):
+        """May append a new snapshot to the sequence."""
+        if not new_snapshot:
+            # Don't add an empty snapshot.
+            return
+        if not self._snapshot or new_snapshot != self._snapshot[-1]:
+            self._snapshot.append(new_snapshot)
+
+    def max_running_trials(self) -> int:
+        """Outputs the max number of running trials at a given time.
+
+        Usually used to assert certain number given resource restrictions.
+        """
+        result = 0
+        for snapshot in self._snapshot:
+            count = 0
+            for trial_id in snapshot:
+                if snapshot[trial_id] == Trial.RUNNING:
+                    count += 1
+            result = max(result, count)
+
+        return result
+
+    def all_trials_are_terminated(self) -> bool:
+        """True if all trials are terminated."""
+        if not self._snapshot:
+            return False
+        last_snapshot = self._snapshot[-1]
+        return all(
+            last_snapshot[trial_id] == Trial.TERMINATED for trial_id in last_snapshot
+        )
+
+
+class TrialStatusSnapshotTaker(Callback):
+    """Collects a sequence of statuses of trials as they progress.
+
+    If all trials keep previous status, no snapshot is taken.
+    """
+
+    def __init__(self, snapshot: TrialStatusSnapshot):
+        self._snapshot = snapshot
+
+    def on_step_end(self, iteration, trials, **kwargs):
+        new_snapshot = defaultdict(str)
+        for trial in trials:
+            new_snapshot[trial.trial_id] = trial.status
+        self._snapshot.append(new_snapshot)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/mock_trainable.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/mock_trainable.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e79181ebba6cc7497aeee9e0771e73ab3e62c66
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/mock_trainable.py
@@ -0,0 +1,63 @@
+import json
+import os
+import time
+
+import numpy as np
+
+from ray.tune import Trainable
+
+MOCK_TRAINABLE_NAME = "mock_trainable"
+MOCK_ERROR_KEY = "mock_error"
+
+
+class MyTrainableClass(Trainable):
+    """Example agent whose learning curve is a random sigmoid.
+
+    The dummy hyperparameters "width" and "height" determine the slope and
+    maximum reward value reached.
+    """
+
+    def setup(self, config):
+        self._sleep_time = config.get("sleep", 0)
+        self._mock_error = config.get(MOCK_ERROR_KEY, False)
+        self._persistent_error = config.get("persistent_error", False)
+
+        self.timestep = 0
+        self.restored = False
+
+    def step(self):
+        if (
+            self._mock_error
+            and self.timestep > 0  # allow at least 1 successful checkpoint.
+            and (self._persistent_error or not self.restored)
+        ):
+            raise RuntimeError(f"Failing on purpose! {self.timestep=}")
+
+        if self._sleep_time > 0:
+            time.sleep(self._sleep_time)
+
+        self.timestep += 1
+        v = np.tanh(float(self.timestep) / self.config.get("width", 1))
+        v *= self.config.get("height", 1)
+
+        # Here we use `episode_reward_mean`, but you can also report other
+        # objectives such as loss or accuracy.
+        return {"episode_reward_mean": v}
+
+    def save_checkpoint(self, checkpoint_dir):
+        path = os.path.join(checkpoint_dir, "checkpoint")
+        with open(path, "w") as f:
+            f.write(json.dumps({"timestep": self.timestep}))
+
+    def load_checkpoint(self, checkpoint_dir):
+        path = os.path.join(checkpoint_dir, "checkpoint")
+        with open(path, "r") as f:
+            self.timestep = json.loads(f.read())["timestep"]
+
+        self.restored = True
+
+
+def register_mock_trainable():
+    from ray.tune import register_trainable
+
+    register_trainable(MOCK_TRAINABLE_NAME, MyTrainableClass)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/object_cache.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/object_cache.py
new file mode 100644
index 0000000000000000000000000000000000000000..99f1b5678d2a74c4b8c5932d0c92f919ec32bb78
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/object_cache.py
@@ -0,0 +1,173 @@
+from collections import Counter, defaultdict
+from typing import Dict, Generator, List, Optional, TypeVar
+
+# Grouping key - must be hashable
+T = TypeVar("T")
+# Objects to cache
+U = TypeVar("U")
+
+
+class _ObjectCache:
+    """Cache up to some maximum count given a grouping key.
+
+    This object cache can e.g. be used to cache Ray Tune trainable actors
+    given their resource requirements (reuse_actors=True).
+
+    If the max number of cached objects for a grouping key is reached,
+    no more objects for this group will be cached.
+
+    However, if `may_keep_one=True`, one object (globally across all grouping
+    keys) may be cached, even if the max number of objects is 0. This is to
+    allow to cache an object if the max number of objects of this key
+    will increase shortly after (as is the case e.g. in the Ray Tune control
+    loop).
+
+    Args:
+        may_keep_one: If True, one object (globally) may be cached if no desired
+            maximum objects are defined.
+
+    """
+
+    def __init__(self, may_keep_one: bool = True):
+        self._num_cached_objects: int = 0
+        self._cached_objects: Dict[T, List[U]] = defaultdict(list)
+        self._max_num_objects: Counter[T] = Counter()
+
+        self._may_keep_one = may_keep_one
+
+    @property
+    def num_cached_objects(self):
+        return self._num_cached_objects
+
+    @property
+    def total_max_objects(self):
+        # Counter.total() is only available for python 3.10+
+        return sum(self._max_num_objects.values())
+
+    def increase_max(self, key: T, by: int = 1) -> None:
+        """Increase number of max objects for this key.
+
+        Args:
+            key: Group key.
+            by: Decrease by this amount.
+        """
+        self._max_num_objects[key] += by
+
+    def decrease_max(self, key: T, by: int = 1) -> None:
+        """Decrease number of max objects for this key.
+
+        Args:
+            key: Group key.
+            by: Decrease by this amount.
+        """
+        self._max_num_objects[key] -= by
+
+    def has_cached_object(self, key: T) -> bool:
+        """Return True if at least one cached object exists for this key.
+
+        Args:
+            key: Group key.
+
+        Returns:
+            True if at least one cached object exists for this key.
+        """
+        return bool(self._cached_objects[key])
+
+    def cache_object(self, key: T, obj: U) -> bool:
+        """Cache object for a given key.
+
+        This will put the object into a cache, assuming the number
+        of cached objects for this key is less than the number of
+        max objects for this key.
+
+        An exception is made if `max_keep_one=True` and no other
+        objects are cached globally. In that case, the object can
+        still be cached.
+
+        Args:
+            key: Group key.
+            obj: Object to cache.
+
+        Returns:
+            True if the object has been cached. False otherwise.
+
+        """
+        # If we have more objects cached already than we desire
+        if len(self._cached_objects[key]) >= self._max_num_objects[key]:
+            # If may_keep_one is False, never cache
+            if not self._may_keep_one:
+                return False
+
+            # If we have more than one other cached object, don't cache
+            if self._num_cached_objects > 0:
+                return False
+
+            # If any other objects are expected to be cached, don't cache
+            if any(v for v in self._max_num_objects.values()):
+                return False
+
+        # Otherwise, cache (for now).
+
+        self._cached_objects[key].append(obj)
+        self._num_cached_objects += 1
+        return True
+
+    def pop_cached_object(self, key: T) -> Optional[U]:
+        """Get one cached object for a key.
+
+        This will remove the object from the cache.
+
+        Args:
+            key: Group key.
+
+        Returns:
+            Cached object.
+        """
+        if not self.has_cached_object(key):
+            return None
+
+        self._num_cached_objects -= 1
+        return self._cached_objects[key].pop(0)
+
+    def flush_cached_objects(self, force_all: bool = False) -> Generator[U, None, None]:
+        """Return a generator over cached objects evicted from the cache.
+
+        This method yields all cached objects that should be evicted from the
+        cache for cleanup by the caller.
+
+        If the number of max objects is lower than the number of
+        cached objects for a given key, objects are evicted until
+        the numbers are equal.
+
+        If `max_keep_one=True` (and ``force_all=False``), one cached object
+        may be retained.
+
+        Objects are evicted FIFO.
+
+        If ``force_all=True``, all objects are evicted.
+
+        Args:
+            force_all: If True, all objects are flushed. This takes precedence
+                over ``keep_one``.
+
+        Yields:
+            Evicted objects to be cleaned up by caller.
+
+        """
+        # If force_all=True, don't keep one.
+        keep_one = self._may_keep_one and not force_all
+
+        for key, objs in self._cached_objects.items():
+            max_cached = self._max_num_objects[key] if not force_all else 0
+
+            if (
+                self._num_cached_objects == 1
+                and keep_one
+                # Only keep this object if we don't expect a different one
+                and not any(v for v in self._max_num_objects.values())
+            ):
+                break
+
+            while len(objs) > max_cached:
+                self._num_cached_objects -= 1
+                yield objs.pop(0)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/release_test_util.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/release_test_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..9120097d52d20bb4d84e6e5ca88fa7aebe167655
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/release_test_util.py
@@ -0,0 +1,190 @@
+import json
+import os
+import pickle
+import tempfile
+import time
+from collections import Counter
+
+import numpy as np
+
+from ray import train, tune
+from ray._private.test_utils import safe_write_to_results_json
+from ray.train import Checkpoint
+from ray.tune.callback import Callback
+
+
+class ProgressCallback(Callback):
+    def __init__(self):
+        self.last_update = 0
+        self.update_interval = 60
+
+    def on_step_end(self, iteration, trials, **kwargs):
+        if time.time() - self.last_update > self.update_interval:
+            now = time.time()
+            result = {
+                "last_update": now,
+                "iteration": iteration,
+                "trial_states": dict(Counter([trial.status for trial in trials])),
+            }
+            safe_write_to_results_json(result, "/tmp/release_test_out.json")
+
+            self.last_update = now
+
+
+class TestDurableTrainable(tune.Trainable):
+    def __init__(self, *args, **kwargs):
+        self.setup_env()
+
+        super(TestDurableTrainable, self).__init__(*args, **kwargs)
+
+    def setup_env(self):
+        pass
+
+    def setup(self, config):
+        self._num_iters = int(config["num_iters"])
+        self._sleep_time = config["sleep_time"]
+        self._score = config["score"]
+
+        self._checkpoint_iters = config["checkpoint_iters"]
+        self._checkpoint_size_b = config["checkpoint_size_b"]
+        self._checkpoint_num_items = self._checkpoint_size_b // 8  # np.float64
+
+        self._iter = 0
+
+    def step(self):
+        if self._iter > 0:
+            time.sleep(self._sleep_time)
+
+        res = dict(score=self._iter + self._score)
+
+        if self._iter >= self._num_iters:
+            res["done"] = True
+
+        self._iter += 1
+        return res
+
+    def save_checkpoint(self, tmp_checkpoint_dir):
+        checkpoint_file = os.path.join(tmp_checkpoint_dir, "bogus.ckpt")
+        checkpoint_data = np.random.uniform(0, 1, size=self._checkpoint_num_items)
+        with open(checkpoint_file, "wb") as fp:
+            pickle.dump(checkpoint_data, fp)
+
+    def load_checkpoint(self, checkpoint):
+        pass
+
+
+def function_trainable(config):
+    num_iters = int(config["num_iters"])
+    sleep_time = config["sleep_time"]
+    score = config["score"]
+
+    checkpoint_iters = config["checkpoint_iters"]
+    checkpoint_size_b = config["checkpoint_size_b"]
+    checkpoint_num_items = checkpoint_size_b // 8  # np.float64
+    checkpoint_num_files = config["checkpoint_num_files"]
+
+    for i in range(num_iters):
+        metrics = {"score": i + score}
+        if (
+            checkpoint_iters >= 0
+            and checkpoint_size_b > 0
+            and i % checkpoint_iters == 0
+        ):
+            with tempfile.TemporaryDirectory() as tmpdir:
+                for i in range(checkpoint_num_files):
+                    checkpoint_file = os.path.join(tmpdir, f"bogus_{i}.ckpt")
+                    checkpoint_data = np.random.uniform(0, 1, size=checkpoint_num_items)
+                    with open(checkpoint_file, "wb") as fp:
+                        pickle.dump(checkpoint_data, fp)
+                train.report(metrics, checkpoint=Checkpoint.from_directory(tmpdir))
+        else:
+            train.report(metrics)
+
+        time.sleep(sleep_time)
+
+
+def timed_tune_run(
+    name: str,
+    num_samples: int,
+    results_per_second: int = 1,
+    trial_length_s: int = 1,
+    max_runtime: int = 300,
+    checkpoint_freq_s: int = -1,
+    checkpoint_size_b: int = 0,
+    checkpoint_num_files: int = 1,
+    **tune_kwargs,
+) -> bool:
+    durable = (
+        "storage_path" in tune_kwargs
+        and tune_kwargs["storage_path"]
+        and (
+            tune_kwargs["storage_path"].startswith("s3://")
+            or tune_kwargs["storage_path"].startswith("gs://")
+        )
+    )
+
+    sleep_time = 1.0 / results_per_second
+    num_iters = int(trial_length_s / sleep_time)
+    checkpoint_iters = -1
+    if checkpoint_freq_s >= 0:
+        checkpoint_iters = int(checkpoint_freq_s / sleep_time)
+
+    config = {
+        "score": tune.uniform(0.0, 1.0),
+        "num_iters": num_iters,
+        "sleep_time": sleep_time,
+        "checkpoint_iters": checkpoint_iters,
+        "checkpoint_size_b": checkpoint_size_b,
+        "checkpoint_num_files": checkpoint_num_files,
+    }
+
+    print(f"Starting benchmark with config: {config}")
+
+    run_kwargs = {"reuse_actors": True, "verbose": 2}
+    run_kwargs.update(tune_kwargs)
+
+    _train = function_trainable
+
+    if durable:
+        _train = TestDurableTrainable
+        run_kwargs["checkpoint_freq"] = checkpoint_iters
+
+    start_time = time.monotonic()
+    analysis = tune.run(
+        _train,
+        config=config,
+        num_samples=num_samples,
+        raise_on_failed_trial=False,
+        **run_kwargs,
+    )
+    time_taken = time.monotonic() - start_time
+
+    result = {
+        "time_taken": time_taken,
+        "trial_states": dict(Counter([trial.status for trial in analysis.trials])),
+        "last_update": time.time(),
+    }
+
+    test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/tune_test.json")
+    with open(test_output_json, "wt") as f:
+        json.dump(result, f)
+
+    success = time_taken <= max_runtime
+
+    if not success:
+        print(
+            f"The {name} test took {time_taken:.2f} seconds, but should not "
+            f"have exceeded {max_runtime:.2f} seconds. Test failed. \n\n"
+            f"--- FAILED: {name.upper()} ::: "
+            f"{time_taken:.2f} > {max_runtime:.2f} ---"
+        )
+    else:
+        print(
+            f"The {name} test took {time_taken:.2f} seconds, which "
+            f"is below the budget of {max_runtime:.2f} seconds. "
+            f"Test successful. \n\n"
+            f"--- PASSED: {name.upper()} ::: "
+            f"{time_taken:.2f} <= {max_runtime:.2f} ---"
+        )
+
+    return success
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/resource_updater.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/resource_updater.py
new file mode 100644
index 0000000000000000000000000000000000000000..d832683193f3268304106ebee5fe5f749ad94f6c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/resource_updater.py
@@ -0,0 +1,369 @@
+import logging
+import os
+import time
+from collections import namedtuple
+from numbers import Number
+from typing import Any, Dict, Optional
+
+import ray
+from ray._private.resource_spec import NODE_ID_PREFIX
+
+logger = logging.getLogger(__name__)
+
+TUNE_STATE_REFRESH_PERIOD = 10  # Refresh resources every 10 s
+
+
+def _to_gb(n_bytes):
+    return round(n_bytes / (1024**3), 2)
+
+
+class _Resources(
+    namedtuple(
+        "_Resources",
+        [
+            "cpu",
+            "gpu",
+            "memory",
+            "object_store_memory",
+            "extra_cpu",
+            "extra_gpu",
+            "extra_memory",
+            "extra_object_store_memory",
+            "custom_resources",
+            "extra_custom_resources",
+            "has_placement_group",
+        ],
+    )
+):
+    """Ray resources required to schedule a trial.
+
+    Parameters:
+        cpu: Number of CPUs to allocate to the trial.
+        gpu: Number of GPUs to allocate to the trial.
+        memory: Memory to reserve for the trial.
+        object_store_memory: Object store memory to reserve.
+        extra_cpu: Extra CPUs to reserve in case the trial needs to
+            launch additional Ray actors that use CPUs.
+        extra_gpu: Extra GPUs to reserve in case the trial needs to
+            launch additional Ray actors that use GPUs.
+        extra_memory: Memory to reserve for the trial launching
+            additional Ray actors that use memory.
+        extra_object_store_memory: Object store memory to reserve for
+            the trial launching additional Ray actors that use object store
+            memory.
+        custom_resources: Mapping of resource to quantity to allocate
+            to the trial.
+        extra_custom_resources: Extra custom resources to reserve in
+            case the trial needs to launch additional Ray actors that use
+            any of these custom resources.
+        has_placement_group: Bool indicating if the trial also
+            has an associated placement group.
+
+    """
+
+    __slots__ = ()
+
+    def __new__(
+        cls,
+        cpu: float,
+        gpu: float,
+        memory: float = 0,
+        object_store_memory: float = 0.0,
+        extra_cpu: float = 0.0,
+        extra_gpu: float = 0.0,
+        extra_memory: float = 0.0,
+        extra_object_store_memory: float = 0.0,
+        custom_resources: Optional[dict] = None,
+        extra_custom_resources: Optional[dict] = None,
+        has_placement_group: bool = False,
+    ):
+        custom_resources = custom_resources or {}
+        extra_custom_resources = extra_custom_resources or {}
+        leftovers = set(custom_resources) ^ set(extra_custom_resources)
+
+        for value in leftovers:
+            custom_resources.setdefault(value, 0)
+            extra_custom_resources.setdefault(value, 0)
+
+        cpu = round(cpu, 2)
+        gpu = round(gpu, 2)
+        memory = round(memory, 2)
+        object_store_memory = round(object_store_memory, 2)
+        extra_cpu = round(extra_cpu, 2)
+        extra_gpu = round(extra_gpu, 2)
+        extra_memory = round(extra_memory, 2)
+        extra_object_store_memory = round(extra_object_store_memory, 2)
+        custom_resources = {
+            resource: round(value, 2) for resource, value in custom_resources.items()
+        }
+        extra_custom_resources = {
+            resource: round(value, 2)
+            for resource, value in extra_custom_resources.items()
+        }
+
+        all_values = [
+            cpu,
+            gpu,
+            memory,
+            object_store_memory,
+            extra_cpu,
+            extra_gpu,
+            extra_memory,
+            extra_object_store_memory,
+        ]
+        all_values += list(custom_resources.values())
+        all_values += list(extra_custom_resources.values())
+        assert len(custom_resources) == len(extra_custom_resources)
+        for entry in all_values:
+            assert isinstance(entry, Number), ("Improper resource value.", entry)
+        return super(_Resources, cls).__new__(
+            cls,
+            cpu,
+            gpu,
+            memory,
+            object_store_memory,
+            extra_cpu,
+            extra_gpu,
+            extra_memory,
+            extra_object_store_memory,
+            custom_resources,
+            extra_custom_resources,
+            has_placement_group,
+        )
+
+    def summary_string(self):
+        summary = "{} CPUs, {} GPUs".format(
+            self.cpu + self.extra_cpu, self.gpu + self.extra_gpu
+        )
+        if self.memory or self.extra_memory:
+            summary += ", {} GiB heap".format(
+                round((self.memory + self.extra_memory) / (1024**3), 2)
+            )
+        if self.object_store_memory or self.extra_object_store_memory:
+            summary += ", {} GiB objects".format(
+                round(
+                    (self.object_store_memory + self.extra_object_store_memory)
+                    / (1024**3),
+                    2,
+                )
+            )
+        custom_summary = ", ".join(
+            [
+                "{} {}".format(self.get_res_total(res), res)
+                for res in self.custom_resources
+                if not res.startswith(NODE_ID_PREFIX)
+            ]
+        )
+        if custom_summary:
+            summary += " ({})".format(custom_summary)
+        return summary
+
+    def cpu_total(self):
+        return self.cpu + self.extra_cpu
+
+    def gpu_total(self):
+        return self.gpu + self.extra_gpu
+
+    def memory_total(self):
+        return self.memory + self.extra_memory
+
+    def object_store_memory_total(self):
+        return self.object_store_memory + self.extra_object_store_memory
+
+    def get_res_total(self, key):
+        return self.custom_resources.get(key, 0) + self.extra_custom_resources.get(
+            key, 0
+        )
+
+    def get(self, key):
+        return self.custom_resources.get(key, 0)
+
+    def is_nonnegative(self):
+        all_values = [self.cpu, self.gpu, self.extra_cpu, self.extra_gpu]
+        all_values += list(self.custom_resources.values())
+        all_values += list(self.extra_custom_resources.values())
+        return all(v >= 0 for v in all_values)
+
+    @classmethod
+    def subtract(cls, original, to_remove):
+        cpu = original.cpu - to_remove.cpu
+        gpu = original.gpu - to_remove.gpu
+        memory = original.memory - to_remove.memory
+        object_store_memory = (
+            original.object_store_memory - to_remove.object_store_memory
+        )
+        extra_cpu = original.extra_cpu - to_remove.extra_cpu
+        extra_gpu = original.extra_gpu - to_remove.extra_gpu
+        extra_memory = original.extra_memory - to_remove.extra_memory
+        extra_object_store_memory = (
+            original.extra_object_store_memory - to_remove.extra_object_store_memory
+        )
+        all_resources = set(original.custom_resources).union(
+            set(to_remove.custom_resources)
+        )
+        new_custom_res = {
+            k: original.custom_resources.get(k, 0)
+            - to_remove.custom_resources.get(k, 0)
+            for k in all_resources
+        }
+        extra_custom_res = {
+            k: original.extra_custom_resources.get(k, 0)
+            - to_remove.extra_custom_resources.get(k, 0)
+            for k in all_resources
+        }
+        return _Resources(
+            cpu,
+            gpu,
+            memory,
+            object_store_memory,
+            extra_cpu,
+            extra_gpu,
+            extra_memory,
+            extra_object_store_memory,
+            new_custom_res,
+            extra_custom_res,
+        )
+
+
+class _ResourceUpdater:
+    """Periodic Resource updater for Tune.
+
+    Initially, all resources are set to 0. The updater will try to update resources
+    when (1) init ResourceUpdater (2) call "update_avail_resources", "num_cpus"
+    or "num_gpus".
+
+    The update takes effect when (1) Ray is initialized (2) the interval between
+    this and last update is larger than "refresh_period"
+    """
+
+    def __init__(self, refresh_period: Optional[float] = None):
+        self._avail_resources = _Resources(cpu=0, gpu=0)
+
+        if refresh_period is None:
+            refresh_period = float(
+                os.environ.get("TUNE_STATE_REFRESH_PERIOD", TUNE_STATE_REFRESH_PERIOD)
+            )
+        self._refresh_period = refresh_period
+        self._last_resource_refresh = float("-inf")
+        self.update_avail_resources()
+
+    def update_avail_resources(self, num_retries: int = 5, force: bool = False):
+        if not ray.is_initialized():
+            return
+        if (
+            time.time() - self._last_resource_refresh < self._refresh_period
+            and not force
+        ):
+            return
+        logger.debug("Checking Ray cluster resources.")
+        resources = None
+        for i in range(num_retries):
+            if i > 0:
+                logger.warning(
+                    f"Cluster resources not detected or are 0. Attempt #{i + 1}...",
+                )
+                time.sleep(0.5)
+            resources = ray.cluster_resources()
+            if resources:
+                break
+
+        if not resources:
+            # NOTE: This hides the possibility that Ray may be waiting for
+            # clients to connect.
+            resources.setdefault("CPU", 0)
+            resources.setdefault("GPU", 0)
+            logger.warning(
+                "Cluster resources cannot be detected or are 0. "
+                "You can resume this experiment by passing in `resume=True` to `run`."
+            )
+
+        resources = resources.copy()
+        num_cpus = resources.pop("CPU", 0)
+        num_gpus = resources.pop("GPU", 0)
+        memory = resources.pop("memory", 0)
+        object_store_memory = resources.pop("object_store_memory", 0)
+        custom_resources = resources
+
+        self._avail_resources = _Resources(
+            int(num_cpus),
+            int(num_gpus),
+            memory=int(memory),
+            object_store_memory=int(object_store_memory),
+            custom_resources=custom_resources,
+        )
+        self._last_resource_refresh = time.time()
+
+    def _get_used_avail_resources(self, total_allocated_resources: Dict[str, Any]):
+        total_allocated_resources = total_allocated_resources.copy()
+
+        used_cpu = total_allocated_resources.pop("CPU", 0)
+        total_cpu = self._avail_resources.cpu
+        used_gpu = total_allocated_resources.pop("GPU", 0)
+        total_gpu = self._avail_resources.gpu
+
+        custom_used_total = {
+            name: (
+                total_allocated_resources.get(name, 0.0),
+                self._avail_resources.get_res_total(name),
+            )
+            for name in self._avail_resources.custom_resources
+            if not name.startswith(NODE_ID_PREFIX)
+            and (total_allocated_resources.get(name, 0.0) > 0 or "_group_" not in name)
+        }
+        return used_cpu, total_cpu, used_gpu, total_gpu, custom_used_total
+
+    def debug_string(self, total_allocated_resources: Dict[str, Any]) -> str:
+        """Returns a human readable message for printing to the console."""
+        if self._last_resource_refresh > 0:
+            (
+                used_cpu,
+                total_cpu,
+                used_gpu,
+                total_gpu,
+                custom_used_total,
+            ) = self._get_used_avail_resources(total_allocated_resources)
+
+            if (
+                used_cpu > total_cpu
+                or used_gpu > total_gpu
+                or any(used > total for (used, total) in custom_used_total.values())
+            ):
+                # If any of the used resources are higher than what we currently think
+                # is available, update our state and re-fetch
+                self.update_avail_resources(force=True)
+                (
+                    used_cpu,
+                    total_cpu,
+                    used_gpu,
+                    total_gpu,
+                    custom_used_total,
+                ) = self._get_used_avail_resources(total_allocated_resources)
+
+            status = (
+                f"Logical resource usage: {used_cpu}/{total_cpu} CPUs, "
+                f"{used_gpu}/{total_gpu} GPUs"
+            )
+            customs = ", ".join(
+                f"{used}/{total} {name}"
+                for name, (used, total) in custom_used_total.items()
+            )
+
+            if customs:
+                status += f" ({customs})"
+            return status
+        else:
+            return "Logical resource usage: ?"
+
+    def get_num_cpus(self) -> int:
+        self.update_avail_resources()
+        return self._avail_resources.cpu
+
+    def get_num_gpus(self) -> int:
+        self.update_avail_resources()
+        return self._avail_resources.gpu
+
+    def __reduce__(self):
+        # Do not need to serialize resources, because we can always
+        # update it again. This also prevents keeping outdated resources
+        # when deserialized.
+        return _ResourceUpdater, (self._refresh_period,)
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/serialization.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/serialization.py
new file mode 100644
index 0000000000000000000000000000000000000000..12ac7b1af060bbf96bdfd234de797aba6f21f182
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/serialization.py
@@ -0,0 +1,43 @@
+import json
+import logging
+import types
+
+from ray import cloudpickle as cloudpickle
+from ray._private.utils import binary_to_hex, hex_to_binary
+from ray.util.annotations import DeveloperAPI
+from ray.util.debug import log_once
+
+logger = logging.getLogger(__name__)
+
+
+@DeveloperAPI
+class TuneFunctionEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, types.FunctionType):
+            return self._to_cloudpickle(obj)
+        try:
+            return super(TuneFunctionEncoder, self).default(obj)
+        except Exception:
+            if log_once(f"tune_func_encode:{str(obj)}"):
+                logger.debug("Unable to encode. Falling back to cloudpickle.")
+            return self._to_cloudpickle(obj)
+
+    def _to_cloudpickle(self, obj):
+        return {
+            "_type": "CLOUDPICKLE_FALLBACK",
+            "value": binary_to_hex(cloudpickle.dumps(obj)),
+        }
+
+
+@DeveloperAPI
+class TuneFunctionDecoder(json.JSONDecoder):
+    def __init__(self, *args, **kwargs):
+        json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
+
+    def object_hook(self, obj):
+        if obj.get("_type") == "CLOUDPICKLE_FALLBACK":
+            return self._from_cloudpickle(obj)
+        return obj
+
+    def _from_cloudpickle(self, obj):
+        return cloudpickle.loads(hex_to_binary(obj["value"]))
diff --git a/.venv/lib/python3.11/site-packages/ray/tune/utils/util.py b/.venv/lib/python3.11/site-packages/ray/tune/utils/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..617c8ccf46a07a62fbe3811d9328e571e67d2456
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/tune/utils/util.py
@@ -0,0 +1,646 @@
+import copy
+import glob
+import inspect
+import logging
+import os
+import threading
+import time
+from collections import defaultdict
+from datetime import datetime
+from numbers import Number
+from threading import Thread
+from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
+
+import numpy as np
+
+import ray
+from ray._private.dict import (  # noqa: F401
+    deep_update,
+    flatten_dict,
+    merge_dicts,
+    unflatten_dict,
+    unflatten_list_dict,
+    unflattened_lookup,
+)
+from ray.air._internal.json import SafeFallbackEncoder  # noqa
+from ray.air._internal.util import is_nan, is_nan_or_inf  # noqa: F401
+from ray.util.annotations import DeveloperAPI, PublicAPI
+
+import psutil
+
+logger = logging.getLogger(__name__)
+
+
+def _import_gputil():
+    try:
+        import GPUtil
+    except ImportError:
+        GPUtil = None
+    return GPUtil
+
+
+START_OF_TIME = time.time()
+
+
+@DeveloperAPI
+class UtilMonitor(Thread):
+    """Class for system usage utilization monitoring.
+
+    It keeps track of CPU, RAM, GPU, VRAM usage (each gpu separately) by
+    pinging for information every x seconds in a separate thread.
+
+    Requires psutil and GPUtil to be installed. Can be enabled with
+    Tuner(param_space={"log_sys_usage": True}).
+    """
+
+    def __init__(self, start=True, delay=0.7):
+        self.stopped = True
+        GPUtil = _import_gputil()
+        self.GPUtil = GPUtil
+        if GPUtil is None and start:
+            logger.warning("Install gputil for GPU system monitoring.")
+
+        if psutil is None and start:
+            logger.warning("Install psutil to monitor system performance.")
+
+        if GPUtil is None and psutil is None:
+            return
+
+        super(UtilMonitor, self).__init__()
+        self.delay = delay  # Time between calls to GPUtil
+        self.values = defaultdict(list)
+        self.lock = threading.Lock()
+        self.daemon = True
+        if start:
+            self.start()
+
+    def _read_utilization(self):
+        with self.lock:
+            if psutil is not None:
+                self.values["cpu_util_percent"].append(
+                    float(psutil.cpu_percent(interval=None))
+                )
+                self.values["ram_util_percent"].append(
+                    float(psutil.virtual_memory().percent)
+                )
+            if self.GPUtil is not None:
+                gpu_list = []
+                try:
+                    gpu_list = self.GPUtil.getGPUs()
+                except Exception:
+                    logger.debug("GPUtil failed to retrieve GPUs.")
+                for gpu in gpu_list:
+                    self.values["gpu_util_percent" + str(gpu.id)].append(
+                        float(gpu.load)
+                    )
+                    self.values["vram_util_percent" + str(gpu.id)].append(
+                        float(gpu.memoryUtil)
+                    )
+
+    def get_data(self):
+        if self.stopped:
+            return {}
+
+        with self.lock:
+            ret_values = copy.deepcopy(self.values)
+            for key, val in self.values.items():
+                del val[:]
+        return {"perf": {k: np.mean(v) for k, v in ret_values.items() if len(v) > 0}}
+
+    def run(self):
+        self.stopped = False
+        while not self.stopped:
+            self._read_utilization()
+            time.sleep(self.delay)
+
+    def stop(self):
+        self.stopped = True
+
+
+@DeveloperAPI
+def retry_fn(
+    fn: Callable[[], Any],
+    exception_type: Union[Type[Exception], Sequence[Type[Exception]]] = Exception,
+    num_retries: int = 3,
+    sleep_time: int = 1,
+    timeout: Optional[Number] = None,
+) -> bool:
+    errored = threading.Event()
+
+    def _try_fn():
+        try:
+            fn()
+        except exception_type as e:
+            logger.warning(e)
+            errored.set()
+
+    for i in range(num_retries):
+        errored.clear()
+
+        proc = threading.Thread(target=_try_fn)
+        proc.daemon = True
+        proc.start()
+        proc.join(timeout=timeout)
+
+        if proc.is_alive():
+            logger.debug(
+                f"Process timed out (try {i+1}/{num_retries}): "
+                f"{getattr(fn, '__name__', None)}"
+            )
+        elif not errored.is_set():
+            return True
+
+        # Timed out, sleep and try again
+        time.sleep(sleep_time)
+
+    # Timed out, so return False
+    return False
+
+
+@DeveloperAPI
+class warn_if_slow:
+    """Prints a warning if a given operation is slower than 500ms.
+
+    Example:
+        >>> from ray.tune.utils.util import warn_if_slow
+        >>> something = ... # doctest: +SKIP
+        >>> with warn_if_slow("some_operation"): # doctest: +SKIP
+        ...    ray.get(something) # doctest: +SKIP
+    """
+
+    DEFAULT_THRESHOLD = float(os.environ.get("TUNE_WARN_THRESHOLD_S", 0.5))
+    DEFAULT_MESSAGE = (
+        "The `{name}` operation took {duration:.3f} s, "
+        "which may be a performance bottleneck."
+    )
+
+    def __init__(
+        self,
+        name: str,
+        threshold: Optional[float] = None,
+        message: Optional[str] = None,
+        disable: bool = False,
+    ):
+        self.name = name
+        self.threshold = threshold or self.DEFAULT_THRESHOLD
+        self.message = message or self.DEFAULT_MESSAGE
+        self.too_slow = False
+        self.disable = disable
+
+    def __enter__(self):
+        self.start = time.time()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        now = time.time()
+        if self.disable:
+            return
+        if now - self.start > self.threshold and now - START_OF_TIME > 60.0:
+            self.too_slow = True
+            duration = now - self.start
+            logger.warning(self.message.format(name=self.name, duration=duration))
+
+
+@DeveloperAPI
+class Tee(object):
+    def __init__(self, stream1, stream2):
+        self.stream1 = stream1
+        self.stream2 = stream2
+
+        # If True, we are currently handling a warning.
+        # We use this flag to avoid infinite recursion.
+        self._handling_warning = False
+
+    def _warn(self, op, s, args, kwargs):
+        # If we are already handling a warning, this is because
+        # `logger.warning` below triggered the same object again
+        # (e.g. because stderr is redirected to this object).
+        # In that case, exit early to avoid recursion.
+        if self._handling_warning:
+            return
+
+        msg = f"ValueError when calling '{op}' on stream ({s}). "
+        msg += f"args: {args} kwargs: {kwargs}"
+
+        self._handling_warning = True
+        logger.warning(msg)
+        self._handling_warning = False
+
+    def seek(self, *args, **kwargs):
+        for s in [self.stream1, self.stream2]:
+            try:
+                s.seek(*args, **kwargs)
+            except ValueError:
+                self._warn("seek", s, args, kwargs)
+
+    def write(self, *args, **kwargs):
+        for s in [self.stream1, self.stream2]:
+            try:
+                s.write(*args, **kwargs)
+            except ValueError:
+                self._warn("write", s, args, kwargs)
+
+    def flush(self, *args, **kwargs):
+        for s in [self.stream1, self.stream2]:
+            try:
+                s.flush(*args, **kwargs)
+            except ValueError:
+                self._warn("flush", s, args, kwargs)
+
+    @property
+    def encoding(self):
+        if hasattr(self.stream1, "encoding"):
+            return self.stream1.encoding
+        return self.stream2.encoding
+
+    @property
+    def error(self):
+        if hasattr(self.stream1, "error"):
+            return self.stream1.error
+        return self.stream2.error
+
+    @property
+    def newlines(self):
+        if hasattr(self.stream1, "newlines"):
+            return self.stream1.newlines
+        return self.stream2.newlines
+
+    def detach(self):
+        raise NotImplementedError
+
+    def read(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def readline(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def tell(self, *args, **kwargs):
+        raise NotImplementedError
+
+
+@DeveloperAPI
+def date_str():
+    return datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
+
+
+def _to_pinnable(obj):
+    """Converts obj to a form that can be pinned in object store memory.
+
+    Currently only numpy arrays are pinned in memory, if you have a strong
+    reference to the array value.
+    """
+
+    return (obj, np.zeros(1))
+
+
+def _from_pinnable(obj):
+    """Retrieve from _to_pinnable format."""
+
+    return obj[0]
+
+
+@DeveloperAPI
+def diagnose_serialization(trainable: Callable):
+    """Utility for detecting why your trainable function isn't serializing.
+
+    Args:
+        trainable: The trainable object passed to
+            tune.Tuner(trainable). Currently only supports
+            Function API.
+
+    Returns:
+        bool | set of unserializable objects.
+
+    Example:
+
+    .. code-block:: python
+
+        import threading
+        # this is not serializable
+        e = threading.Event()
+
+        def test():
+            print(e)
+
+        diagnose_serialization(test)
+        # should help identify that 'e' should be moved into
+        # the `test` scope.
+
+        # correct implementation
+        def test():
+            e = threading.Event()
+            print(e)
+
+        assert diagnose_serialization(test) is True
+
+    """
+    from ray.tune.registry import _check_serializability, register_trainable
+
+    def check_variables(objects, failure_set, printer):
+        for var_name, variable in objects.items():
+            msg = None
+            try:
+                _check_serializability(var_name, variable)
+                status = "PASSED"
+            except Exception as e:
+                status = "FAILED"
+                msg = f"{e.__class__.__name__}: {str(e)}"
+                failure_set.add(var_name)
+            printer(f"{str(variable)}[name='{var_name}'']... {status}")
+            if msg:
+                printer(msg)
+
+    print(f"Trying to serialize {trainable}...")
+    try:
+        register_trainable("__test:" + str(trainable), trainable, warn=False)
+        print("Serialization succeeded!")
+        return True
+    except Exception as e:
+        print(f"Serialization failed: {e}")
+
+    print(
+        "Inspecting the scope of the trainable by running "
+        f"`inspect.getclosurevars({str(trainable)})`..."
+    )
+    closure = inspect.getclosurevars(trainable)
+    failure_set = set()
+    if closure.globals:
+        print(
+            f"Detected {len(closure.globals)} global variables. "
+            "Checking serializability..."
+        )
+        check_variables(closure.globals, failure_set, lambda s: print("   " + s))
+
+    if closure.nonlocals:
+        print(
+            f"Detected {len(closure.nonlocals)} nonlocal variables. "
+            "Checking serializability..."
+        )
+        check_variables(closure.nonlocals, failure_set, lambda s: print("   " + s))
+
+    if not failure_set:
+        print(
+            "Nothing was found to have failed the diagnostic test, though "
+            "serialization did not succeed. Feel free to raise an "
+            "issue on github."
+        )
+        return failure_set
+    else:
+        print(
+            f"Variable(s) {failure_set} was found to be non-serializable. "
+            "Consider either removing the instantiation/imports "
+            "of these objects or moving them into the scope of "
+            "the trainable. "
+        )
+        return failure_set
+
+
+def _atomic_save(state: Dict, checkpoint_dir: str, file_name: str, tmp_file_name: str):
+    """Atomically saves the state object to the checkpoint directory.
+
+    This is automatically used by Tuner().fit during a Tune job.
+
+    Args:
+        state: Object state to be serialized.
+        checkpoint_dir: Directory location for the checkpoint.
+        file_name: Final name of file.
+        tmp_file_name: Temporary name of file.
+    """
+    import ray.cloudpickle as cloudpickle
+
+    tmp_search_ckpt_path = os.path.join(checkpoint_dir, tmp_file_name)
+    with open(tmp_search_ckpt_path, "wb") as f:
+        cloudpickle.dump(state, f)
+
+    os.replace(tmp_search_ckpt_path, os.path.join(checkpoint_dir, file_name))
+
+
+def _load_newest_checkpoint(dirpath: str, ckpt_pattern: str) -> Optional[Dict]:
+    """Returns the most recently modified checkpoint.
+
+    Assumes files are saved with an ordered name, most likely by
+    :obj:atomic_save.
+
+    Args:
+        dirpath: Directory in which to look for the checkpoint file.
+        ckpt_pattern: File name pattern to match to find checkpoint
+            files.
+
+    Returns:
+        (dict) Deserialized state dict.
+    """
+    import ray.cloudpickle as cloudpickle
+
+    full_paths = glob.glob(os.path.join(dirpath, ckpt_pattern))
+    if not full_paths:
+        return
+    most_recent_checkpoint = max(full_paths)
+    with open(most_recent_checkpoint, "rb") as f:
+        checkpoint_state = cloudpickle.load(f)
+    return checkpoint_state
+
+
+@PublicAPI(stability="beta")
+def wait_for_gpu(
+    gpu_id: Optional[Union[int, str]] = None,
+    target_util: float = 0.01,
+    retry: int = 20,
+    delay_s: int = 5,
+    gpu_memory_limit: Optional[float] = None,
+):
+    """Checks if a given GPU has freed memory.
+
+    Requires ``gputil`` to be installed: ``pip install gputil``.
+
+    Args:
+        gpu_id: GPU id or uuid to check.
+            Must be found within GPUtil.getGPUs(). If none, resorts to
+            the first item returned from `ray.get_gpu_ids()`.
+        target_util: The utilization threshold to reach to unblock.
+            Set this to 0 to block until the GPU is completely free.
+        retry: Number of times to check GPU limit. Sleeps `delay_s`
+            seconds between checks.
+        delay_s: Seconds to wait before check.
+
+    Returns:
+        bool: True if free.
+
+    Raises:
+        RuntimeError: If GPUtil is not found, if no GPUs are detected
+            or if the check fails.
+
+    Example:
+
+    .. code-block:: python
+
+        def tune_func(config):
+            tune.utils.wait_for_gpu()
+            train()
+
+        tuner = tune.Tuner(
+            tune.with_resources(
+                tune_func,
+                resources={"gpu": 1}
+            ),
+            tune_config=tune.TuneConfig(num_samples=10)
+        )
+        tuner.fit()
+
+    """
+    GPUtil = _import_gputil()
+
+    if GPUtil is None:
+        raise RuntimeError("GPUtil must be installed if calling `wait_for_gpu`.")
+
+    if gpu_id is None:
+        gpu_id_list = ray.get_gpu_ids()
+        if not gpu_id_list:
+            raise RuntimeError(
+                "No GPU ids found from `ray.get_gpu_ids()`. "
+                "Did you set Tune resources correctly?"
+            )
+        gpu_id = gpu_id_list[0]
+
+    gpu_attr = "id"
+    if isinstance(gpu_id, str):
+        if gpu_id.isdigit():
+            # GPU ID returned from `ray.get_gpu_ids()` is a str representation
+            # of the int GPU ID
+            gpu_id = int(gpu_id)
+        else:
+            # Could not coerce gpu_id to int, so assume UUID
+            # and compare against `uuid` attribute e.g.,
+            # 'GPU-04546190-b68d-65ac-101b-035f8faed77d'
+            gpu_attr = "uuid"
+    elif not isinstance(gpu_id, int):
+        raise ValueError(f"gpu_id ({type(gpu_id)}) must be type str/int.")
+
+    def gpu_id_fn(g):
+        # Returns either `g.id` or `g.uuid` depending on
+        # the format of the input `gpu_id`
+        return getattr(g, gpu_attr)
+
+    gpu_ids = {gpu_id_fn(g) for g in GPUtil.getGPUs()}
+    if gpu_id not in gpu_ids:
+        raise ValueError(
+            f"{gpu_id} not found in set of available GPUs: {gpu_ids}. "
+            "`wait_for_gpu` takes either GPU ordinal ID (e.g., '0') or "
+            "UUID (e.g., 'GPU-04546190-b68d-65ac-101b-035f8faed77d')."
+        )
+
+    for i in range(int(retry)):
+        gpu_object = next(g for g in GPUtil.getGPUs() if gpu_id_fn(g) == gpu_id)
+        if gpu_object.memoryUtil > target_util:
+            logger.info(
+                f"Waiting for GPU util to reach {target_util}. "
+                f"Util: {gpu_object.memoryUtil:0.3f}"
+            )
+            time.sleep(delay_s)
+        else:
+            return True
+    raise RuntimeError("GPU memory was not freed.")
+
+
+@DeveloperAPI
+def validate_save_restore(
+    trainable_cls: Type,
+    config: Optional[Dict] = None,
+    num_gpus: int = 0,
+):
+    """Helper method to check if your Trainable class will resume correctly.
+
+    Args:
+        trainable_cls: Trainable class for evaluation.
+        config: Config to pass to Trainable when testing.
+        num_gpus: GPU resources to allocate when testing.
+        use_object_store: Whether to save and restore to Ray's object
+            store. Recommended to set this to True if planning to use
+            algorithms that pause training (i.e., PBT, HyperBand).
+    """
+    assert ray.is_initialized(), "Need Ray to be initialized."
+
+    remote_cls = ray.remote(num_gpus=num_gpus)(trainable_cls)
+    trainable_1 = remote_cls.remote(config=config)
+    trainable_2 = remote_cls.remote(config=config)
+
+    from ray.air.constants import TRAINING_ITERATION
+
+    for _ in range(3):
+        res = ray.get(trainable_1.train.remote())
+
+    assert res.get(TRAINING_ITERATION), (
+        "Validation will not pass because it requires `training_iteration` "
+        "to be returned."
+    )
+
+    ray.get(trainable_2.restore.remote(trainable_1.save.remote()))
+
+    res = ray.get(trainable_2.train.remote())
+    assert res[TRAINING_ITERATION] == 4
+
+    res = ray.get(trainable_2.train.remote())
+    assert res[TRAINING_ITERATION] == 5
+    return True
+
+
+def _detect_config_single(func):
+    """Check if func({}) works."""
+    func_sig = inspect.signature(func)
+    use_config_single = True
+    try:
+        func_sig.bind({})
+    except Exception as e:
+        logger.debug(str(e))
+        use_config_single = False
+    return use_config_single
+
+
+@PublicAPI()
+def validate_warmstart(
+    parameter_names: List[str],
+    points_to_evaluate: List[Union[List, Dict]],
+    evaluated_rewards: List,
+    validate_point_name_lengths: bool = True,
+):
+    """Generic validation of a Searcher's warm start functionality.
+    Raises exceptions in case of type and length mismatches between
+    parameters.
+
+    If ``validate_point_name_lengths`` is False, the equality of lengths
+    between ``points_to_evaluate`` and ``parameter_names`` will not be
+    validated.
+    """
+    if points_to_evaluate:
+        if not isinstance(points_to_evaluate, list):
+            raise TypeError(
+                "points_to_evaluate expected to be a list, got {}.".format(
+                    type(points_to_evaluate)
+                )
+            )
+        for point in points_to_evaluate:
+            if not isinstance(point, (dict, list)):
+                raise TypeError(
+                    f"points_to_evaluate expected to include list or dict, "
+                    f"got {point}."
+                )
+
+            if validate_point_name_lengths and (not len(point) == len(parameter_names)):
+                raise ValueError(
+                    "Dim of point {}".format(point)
+                    + " and parameter_names {}".format(parameter_names)
+                    + " do not match."
+                )
+
+    if points_to_evaluate and evaluated_rewards:
+        if not isinstance(evaluated_rewards, list):
+            raise TypeError(
+                "evaluated_rewards expected to be a list, got {}.".format(
+                    type(evaluated_rewards)
+                )
+            )
+        if not len(evaluated_rewards) == len(points_to_evaluate):
+            raise ValueError(
+                "Dim of evaluated_rewards {}".format(evaluated_rewards)
+                + " and points_to_evaluate {}".format(points_to_evaluate)
+                + " do not match."
+            )