diff --git a/.gitattributes b/.gitattributes
index 22c71914747072499aa8caca9eafe6cee7371f48..403db22d381680a1ef963b6288d40f01393ee1b8 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -155,3 +155,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/ray/data/__pycache__/dataset.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/ray/data/__pycache__/read_api.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/ray/serve/_private/__pycache__/deployment_state.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
diff --git a/.venv/lib/python3.11/site-packages/ray/air/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..07f8d0077492d7d442ea9384aea791930c487304
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/__init__.py
@@ -0,0 +1,22 @@
+from ray.air.config import (
+    CheckpointConfig,
+    DatasetConfig,
+    FailureConfig,
+    RunConfig,
+    ScalingConfig,
+)
+from ray.air.data_batch_type import DataBatchType
+from ray.air.execution.resources.request import AcquiredResources, ResourceRequest
+from ray.air.result import Result
+
+__all__ = [
+    "DataBatchType",
+    "RunConfig",
+    "Result",
+    "ScalingConfig",
+    "DatasetConfig",
+    "FailureConfig",
+    "CheckpointConfig",
+    "AcquiredResources",
+    "ResourceRequest",
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/config.py b/.venv/lib/python3.11/site-packages/ray/air/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..85e6fac7d716802715dff0ae003efd6d673e0044
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/config.py
@@ -0,0 +1,766 @@
+import logging
+from collections import Counter, defaultdict
+from dataclasses import _MISSING_TYPE, dataclass, fields
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import pyarrow.fs
+
+from ray._private.ray_constants import RESOURCE_CONSTRAINT_PREFIX
+from ray._private.storage import _get_storage_uri
+from ray._private.thirdparty.tabulate.tabulate import tabulate
+from ray.data.preprocessor import Preprocessor
+from ray.util.annotations import Deprecated, PublicAPI
+from ray.widgets import Template, make_table_html_repr
+
+if TYPE_CHECKING:
+    from ray.train import SyncConfig
+    from ray.tune.callback import Callback
+    from ray.tune.execution.placement_groups import PlacementGroupFactory
+    from ray.tune.experimental.output import AirVerbosity
+    from ray.tune.search.sample import Domain
+    from ray.tune.stopper import Stopper
+    from ray.tune.utils.log import Verbosity
+
+
+# Dict[str, List] is to support `tune.grid_search`:
+# TODO(sumanthratna/matt): Upstream this to Tune.
+SampleRange = Union["Domain", Dict[str, List]]
+
+
+MAX = "max"
+MIN = "min"
+_DEPRECATED_VALUE = "DEPRECATED"
+
+DATASET_CONFIG_DEPRECATION_MSG = """
+Use `ray.train.DataConfig` instead of DatasetConfig to configure data ingest for training. See https://docs.ray.io/en/releases-2.6.3/ray-air/check-ingest.html#migrating-from-the-legacy-datasetconfig-api for more details.
+"""  # noqa: E501
+
+
+logger = logging.getLogger(__name__)
+
+
+def _repr_dataclass(obj, *, default_values: Optional[Dict[str, Any]] = None) -> str:
+    """A utility function to elegantly represent dataclasses.
+
+    In contrast to the default dataclass `__repr__`, which shows all parameters, this
+    function only shows parameters with non-default values.
+
+    Args:
+        obj: The dataclass to represent.
+        default_values: An optional dictionary that maps field names to default values.
+            Use this parameter to specify default values that are generated dynamically
+            (e.g., in `__post_init__` or by a `default_factory`). If a default value
+            isn't specified in `default_values`, then the default value is inferred from
+            the `dataclass`.
+
+    Returns:
+        A representation of the dataclass.
+    """
+    if default_values is None:
+        default_values = {}
+
+    non_default_values = {}  # Maps field name to value.
+
+    def equals(value, default_value):
+        # We need to special case None because of a bug in pyarrow:
+        # https://github.com/apache/arrow/issues/38535
+        if value is None and default_value is None:
+            return True
+        if value is None or default_value is None:
+            return False
+        return value == default_value
+
+    for field in fields(obj):
+        value = getattr(obj, field.name)
+        default_value = default_values.get(field.name, field.default)
+        is_required = isinstance(field.default, _MISSING_TYPE)
+        if is_required or not equals(value, default_value):
+            non_default_values[field.name] = value
+
+    string = f"{obj.__class__.__name__}("
+    string += ", ".join(
+        f"{name}={value!r}" for name, value in non_default_values.items()
+    )
+    string += ")"
+
+    return string
+
+
+@dataclass
+@PublicAPI(stability="stable")
+class ScalingConfig:
+    """Configuration for scaling training.
+
+    For more details, see :ref:`train_scaling_config`.
+
+    Args:
+        trainer_resources: Resources to allocate for the training coordinator.
+            The training coordinator launches the worker group and executes
+            the training function per worker, and this process does NOT require
+            GPUs. The coordinator is always scheduled on the same node as the
+            rank 0 worker, so one example use case is to set a minimum amount
+            of resources (e.g. CPU memory) required by the rank 0 node.
+            By default, this assigns 1 CPU to the training coordinator.
+        num_workers: The number of workers (Ray actors) to launch.
+            Each worker will reserve 1 CPU by default. The number of CPUs
+            reserved by each worker can be overridden with the
+            ``resources_per_worker`` argument.
+        use_gpu: If True, training will be done on GPUs (1 per worker).
+            Defaults to False. The number of GPUs reserved by each
+            worker can be overridden with the ``resources_per_worker``
+            argument.
+        resources_per_worker: If specified, the resources
+            defined in this Dict is reserved for each worker.
+            Define the ``"CPU"`` key (case-sensitive) to
+            override the number of CPUs used by each worker.
+            This can also be used to request :ref:`custom resources <custom-resources>`.
+        placement_strategy: The placement strategy to use for the
+            placement group of the Ray actors. See :ref:`Placement Group
+            Strategies <pgroup-strategy>` for the possible options.
+        accelerator_type: [Experimental] If specified, Ray Train will launch the
+            training coordinator and workers on the nodes with the specified type
+            of accelerators.
+            See :ref:`the available accelerator types <accelerator_types>`.
+            Ensure that your cluster has instances with the specified accelerator type
+            or is able to autoscale to fulfill the request.
+
+    Example:
+
+        .. code-block:: python
+
+            from ray.train import ScalingConfig
+            scaling_config = ScalingConfig(
+                # Number of distributed workers.
+                num_workers=2,
+                # Turn on/off GPU.
+                use_gpu=True,
+                # Assign extra CPU/GPU/custom resources per worker.
+                resources_per_worker={"GPU": 1, "CPU": 1, "memory": 1e9, "custom": 1.0},
+                # Try to schedule workers on different nodes.
+                placement_strategy="SPREAD",
+            )
+
+    """
+
+    trainer_resources: Optional[Union[Dict, SampleRange]] = None
+    num_workers: Union[int, SampleRange] = 1
+    use_gpu: Union[bool, SampleRange] = False
+    resources_per_worker: Optional[Union[Dict, SampleRange]] = None
+    placement_strategy: Union[str, SampleRange] = "PACK"
+    accelerator_type: Optional[str] = None
+
+    def __post_init__(self):
+        if self.resources_per_worker:
+            if not self.use_gpu and self.num_gpus_per_worker > 0:
+                raise ValueError(
+                    "`use_gpu` is False but `GPU` was found in "
+                    "`resources_per_worker`. Either set `use_gpu` to True or "
+                    "remove `GPU` from `resources_per_worker."
+                )
+
+            if self.use_gpu and self.num_gpus_per_worker == 0:
+                raise ValueError(
+                    "`use_gpu` is True but `GPU` is set to 0 in "
+                    "`resources_per_worker`. Either set `use_gpu` to False or "
+                    "request a positive number of `GPU` in "
+                    "`resources_per_worker."
+                )
+
+    def __repr__(self):
+        return _repr_dataclass(self)
+
+    def _repr_html_(self) -> str:
+        return make_table_html_repr(obj=self, title=type(self).__name__)
+
+    def __eq__(self, o: "ScalingConfig") -> bool:
+        if not isinstance(o, type(self)):
+            return False
+        return self.as_placement_group_factory() == o.as_placement_group_factory()
+
+    @property
+    def _resources_per_worker_not_none(self):
+        if self.resources_per_worker is None:
+            if self.use_gpu:
+                # Note that we don't request any CPUs, which avoids possible
+                # scheduling contention. Generally nodes have many more CPUs than
+                # GPUs, so not requesting a CPU does not lead to oversubscription.
+                resources_per_worker = {"GPU": 1}
+            else:
+                resources_per_worker = {"CPU": 1}
+        else:
+            resources_per_worker = {
+                k: v for k, v in self.resources_per_worker.items() if v != 0
+            }
+
+        if self.use_gpu:
+            resources_per_worker.setdefault("GPU", 1)
+
+        if self.accelerator_type:
+            accelerator = f"{RESOURCE_CONSTRAINT_PREFIX}{self.accelerator_type}"
+            resources_per_worker.setdefault(accelerator, 0.001)
+        return resources_per_worker
+
+    @property
+    def _trainer_resources_not_none(self):
+        if self.trainer_resources is None:
+            if self.num_workers:
+                # For Google Colab, don't allocate resources to the base Trainer.
+                # Colab only has 2 CPUs, and because of this resource scarcity,
+                # we have to be careful on where we allocate resources. Since Colab
+                # is not distributed, the concern about many parallel Ray Tune trials
+                # leading to all Trainers being scheduled on the head node if we set
+                # `trainer_resources` to 0 is no longer applicable.
+                try:
+                    import google.colab  # noqa: F401
+
+                    trainer_num_cpus = 0
+                except ImportError:
+                    trainer_num_cpus = 1
+            else:
+                # If there are no additional workers, then always reserve 1 CPU for
+                # the Trainer.
+                trainer_num_cpus = 1
+
+            trainer_resources = {"CPU": trainer_num_cpus}
+        else:
+            trainer_resources = {
+                k: v for k, v in self.trainer_resources.items() if v != 0
+            }
+
+        return trainer_resources
+
+    @property
+    def total_resources(self):
+        """Map of total resources required for the trainer."""
+        total_resource_map = defaultdict(float, self._trainer_resources_not_none)
+        for k, value in self._resources_per_worker_not_none.items():
+            total_resource_map[k] += value * self.num_workers
+        return dict(total_resource_map)
+
+    @property
+    def num_cpus_per_worker(self):
+        """The number of CPUs to set per worker."""
+        return self._resources_per_worker_not_none.get("CPU", 0)
+
+    @property
+    def num_gpus_per_worker(self):
+        """The number of GPUs to set per worker."""
+        return self._resources_per_worker_not_none.get("GPU", 0)
+
+    @property
+    def additional_resources_per_worker(self):
+        """Resources per worker, not including CPU or GPU resources."""
+        return {
+            k: v
+            for k, v in self._resources_per_worker_not_none.items()
+            if k not in ["CPU", "GPU"]
+        }
+
+    def as_placement_group_factory(self) -> "PlacementGroupFactory":
+        """Returns a PlacementGroupFactory to specify resources for Tune."""
+        from ray.tune.execution.placement_groups import PlacementGroupFactory
+
+        trainer_bundle = self._trainer_resources_not_none
+        worker_bundle = self._resources_per_worker_not_none
+
+        # Colocate Trainer and rank0 worker by merging their bundles
+        # Note: This empty bundle is required so that the Tune actor manager schedules
+        # the Trainable onto the combined bundle while taking none of its resources,
+        # rather than a non-empty head bundle.
+        combined_bundle = dict(Counter(trainer_bundle) + Counter(worker_bundle))
+        bundles = [{}, combined_bundle] + [worker_bundle] * (self.num_workers - 1)
+        return PlacementGroupFactory(bundles, strategy=self.placement_strategy)
+
+    @classmethod
+    def from_placement_group_factory(
+        cls, pgf: "PlacementGroupFactory"
+    ) -> "ScalingConfig":
+        """Create a ScalingConfig from a Tune's PlacementGroupFactory
+
+        Note that this is only needed for ResourceChangingScheduler, which
+        modifies a trial's PlacementGroupFactory but doesn't propagate
+        the changes to ScalingConfig. TrainTrainable needs to reconstruct
+        a ScalingConfig from on the trial's PlacementGroupFactory.
+        """
+
+        # pgf.bundles = [{trainer + worker}, {worker}, ..., {worker}]
+        num_workers = len(pgf.bundles)
+        combined_resources = pgf.bundles[0]
+        resources_per_worker = pgf.bundles[-1]
+        use_gpu = bool(resources_per_worker.get("GPU", False))
+        placement_strategy = pgf.strategy
+
+        # In `as_placement_group_factory`, we merged the trainer resource into the
+        # first worker resources bundle. We need to calculate the resources diff to
+        # get the trainer resources.
+        # Note: If there's only one worker, we won't be able to calculate the diff.
+        # We'll have empty trainer bundle and assign all resources to the worker.
+        trainer_resources = dict(
+            Counter(combined_resources) - Counter(resources_per_worker)
+        )
+
+        return ScalingConfig(
+            trainer_resources=trainer_resources,
+            num_workers=num_workers,
+            use_gpu=use_gpu,
+            resources_per_worker=resources_per_worker,
+            placement_strategy=placement_strategy,
+        )
+
+
+@dataclass
+@Deprecated(DATASET_CONFIG_DEPRECATION_MSG)
+class DatasetConfig:
+    """Configuration for ingest of a single Dataset.
+
+    See :ref:`the AIR Dataset configuration guide <data-ingest-torch>` for
+    usage examples.
+
+    This config defines how the Dataset should be read into the DataParallelTrainer.
+    It configures the preprocessing, splitting, and ingest strategy per-dataset.
+
+    DataParallelTrainers declare default DatasetConfigs for each dataset passed in the
+    ``datasets`` argument. Users have the opportunity to selectively override these
+    configs by passing the ``dataset_config`` argument. Trainers can also define user
+    customizable values (e.g., XGBoostTrainer doesn't support streaming ingest).
+
+    Args:
+        fit: Whether to fit preprocessors on this dataset. This can be set on at most
+            one dataset at a time. True by default for the "train" dataset only.
+        split: Whether the dataset should be split across multiple workers.
+            True by default for the "train" dataset only.
+        required: Whether to raise an error if the Dataset isn't provided by the user.
+            False by default.
+        transform: Whether to transform the dataset with the fitted preprocessor.
+            This must be enabled at least for the dataset that is fit.
+            True by default.
+        max_object_store_memory_fraction [Experimental]: The maximum fraction
+            of Ray's shared-memory object store to use for the dataset. The
+            default value is -1, meaning that the preprocessed dataset should
+            be cached, which may cause spilling if its size is larger than the
+            object store's capacity. Pipelined ingest (all other values, 0 or
+            higher) is experimental. Note that the absolute memory capacity
+            used is based on the object store capacity at invocation time; this
+            does not currently cover autoscaling cases where the size of the
+            cluster may change.
+        global_shuffle: Whether to enable global shuffle (per pipeline window
+            in streaming mode). Note that this is an expensive all-to-all operation,
+            and most likely you want to use local shuffle instead.
+            See https://docs.ray.io/en/master/data/faq.html and
+            https://docs.ray.io/en/master/ray-air/check-ingest.html.
+            False by default.
+        randomize_block_order: Whether to randomize the iteration order over blocks.
+            The main purpose of this is to prevent data fetching hotspots in the
+            cluster when running many parallel workers / trials on the same data.
+            We recommend enabling it always. True by default.
+        per_epoch_preprocessor [Experimental]: A preprocessor to re-apply on
+            each pass of the dataset. The main use case for this is to apply a
+            random transform on a training dataset on each epoch. The
+            per-epoch preprocessor will be applied *after* all other
+            preprocessors and in parallel with the dataset consumer.
+        use_stream_api: Deprecated. Use max_object_store_memory_fraction instead.
+        stream_window_size: Deprecated. Use max_object_store_memory_fraction instead.
+    """
+
+    # TODO(ekl) could we unify DataParallelTrainer and Trainer so the same data ingest
+    # strategy applies to all Trainers?
+
+    fit: Optional[bool] = None
+    split: Optional[bool] = None
+    required: Optional[bool] = None
+    transform: Optional[bool] = None
+    max_object_store_memory_fraction: Optional[float] = None
+    global_shuffle: Optional[bool] = None
+    randomize_block_order: Optional[bool] = None
+    per_epoch_preprocessor: Optional["Preprocessor"] = None
+    # Deprecated.
+    use_stream_api: Optional[int] = None
+    stream_window_size: Optional[int] = None
+
+    def __post_init__(self):
+        raise DeprecationWarning(DATASET_CONFIG_DEPRECATION_MSG)
+
+
+@dataclass
+@PublicAPI(stability="stable")
+class FailureConfig:
+    """Configuration related to failure handling of each training/tuning run.
+
+    Args:
+        max_failures: Tries to recover a run at least this many times.
+            Will recover from the latest checkpoint if present.
+            Setting to -1 will lead to infinite recovery retries.
+            Setting to 0 will disable retries. Defaults to 0.
+        fail_fast: Whether to fail upon the first error.
+            If fail_fast='raise' provided, the original error during training will be
+            immediately raised. fail_fast='raise' can easily leak resources and
+            should be used with caution.
+    """
+
+    max_failures: int = 0
+    fail_fast: Union[bool, str] = False
+
+    def __post_init__(self):
+        # Same check as in TuneController
+        if not (isinstance(self.fail_fast, bool) or self.fail_fast.upper() == "RAISE"):
+            raise ValueError(
+                "fail_fast must be one of {bool, 'raise'}. " f"Got {self.fail_fast}."
+            )
+
+        # Same check as in tune.run
+        if self.fail_fast and self.max_failures != 0:
+            raise ValueError(
+                f"max_failures must be 0 if fail_fast={repr(self.fail_fast)}."
+            )
+
+    def __repr__(self):
+        return _repr_dataclass(self)
+
+    def _repr_html_(self):
+        return Template("scrollableTable.html.j2").render(
+            table=tabulate(
+                {
+                    "Setting": ["Max failures", "Fail fast"],
+                    "Value": [self.max_failures, self.fail_fast],
+                },
+                tablefmt="html",
+                showindex=False,
+                headers="keys",
+            ),
+            max_height="none",
+        )
+
+
+@dataclass
+@PublicAPI(stability="stable")
+class CheckpointConfig:
+    """Configurable parameters for defining the checkpointing strategy.
+
+    Default behavior is to persist all checkpoints to disk. If
+    ``num_to_keep`` is set, the default retention policy is to keep the
+    checkpoints with maximum timestamp, i.e. the most recent checkpoints.
+
+    Args:
+        num_to_keep: The number of checkpoints to keep
+            on disk for this run. If a checkpoint is persisted to disk after
+            there are already this many checkpoints, then an existing
+            checkpoint will be deleted. If this is ``None`` then checkpoints
+            will not be deleted. Must be >= 1.
+        checkpoint_score_attribute: The attribute that will be used to
+            score checkpoints to determine which checkpoints should be kept
+            on disk when there are greater than ``num_to_keep`` checkpoints.
+            This attribute must be a key from the checkpoint
+            dictionary which has a numerical value. Per default, the last
+            checkpoints will be kept.
+        checkpoint_score_order: Either "max" or "min".
+            If "max", then checkpoints with highest values of
+            ``checkpoint_score_attribute`` will be kept.
+            If "min", then checkpoints with lowest values of
+            ``checkpoint_score_attribute`` will be kept.
+        checkpoint_frequency: Number of iterations between checkpoints. If 0
+            this will disable checkpointing.
+            Please note that most trainers will still save one checkpoint at
+            the end of training.
+            This attribute is only supported
+            by trainers that don't take in custom training loops.
+        checkpoint_at_end: If True, will save a checkpoint at the end of training.
+            This attribute is only supported by trainers that don't take in
+            custom training loops. Defaults to True for trainers that support it
+            and False for generic function trainables.
+        _checkpoint_keep_all_ranks: This experimental config is deprecated.
+            This behavior is now controlled by reporting `checkpoint=None`
+            in the workers that shouldn't persist a checkpoint.
+            For example, if you only want the rank 0 worker to persist a checkpoint
+            (e.g., in standard data parallel training), then you should save and
+            report a checkpoint if `ray.train.get_context().get_world_rank() == 0`
+            and `None` otherwise.
+        _checkpoint_upload_from_workers: This experimental config is deprecated.
+            Uploading checkpoint directly from the worker is now the default behavior.
+    """
+
+    num_to_keep: Optional[int] = None
+    checkpoint_score_attribute: Optional[str] = None
+    checkpoint_score_order: Optional[str] = MAX
+    checkpoint_frequency: Optional[int] = 0
+    checkpoint_at_end: Optional[bool] = None
+    _checkpoint_keep_all_ranks: Optional[bool] = _DEPRECATED_VALUE
+    _checkpoint_upload_from_workers: Optional[bool] = _DEPRECATED_VALUE
+
+    def __post_init__(self):
+        if self._checkpoint_keep_all_ranks != _DEPRECATED_VALUE:
+            raise DeprecationWarning(
+                "The experimental `_checkpoint_keep_all_ranks` config is deprecated. "
+                "This behavior is now controlled by reporting `checkpoint=None` "
+                "in the workers that shouldn't persist a checkpoint. "
+                "For example, if you only want the rank 0 worker to persist a "
+                "checkpoint (e.g., in standard data parallel training), "
+                "then you should save and report a checkpoint if "
+                "`ray.train.get_context().get_world_rank() == 0` "
+                "and `None` otherwise."
+            )
+
+        if self._checkpoint_upload_from_workers != _DEPRECATED_VALUE:
+            raise DeprecationWarning(
+                "The experimental `_checkpoint_upload_from_workers` config is "
+                "deprecated. Uploading checkpoint directly from the worker is "
+                "now the default behavior."
+            )
+
+        if self.num_to_keep is not None and self.num_to_keep <= 0:
+            raise ValueError(
+                f"Received invalid num_to_keep: "
+                f"{self.num_to_keep}. "
+                f"Must be None or an integer >= 1."
+            )
+        if self.checkpoint_score_order not in (MAX, MIN):
+            raise ValueError(
+                f"checkpoint_score_order must be either " f'"{MAX}" or "{MIN}".'
+            )
+
+        if self.checkpoint_frequency < 0:
+            raise ValueError(
+                f"checkpoint_frequency must be >=0, got {self.checkpoint_frequency}"
+            )
+
+    def __repr__(self):
+        return _repr_dataclass(self)
+
+    def _repr_html_(self) -> str:
+        if self.num_to_keep is None:
+            num_to_keep_repr = "All"
+        else:
+            num_to_keep_repr = self.num_to_keep
+
+        if self.checkpoint_score_attribute is None:
+            checkpoint_score_attribute_repr = "Most recent"
+        else:
+            checkpoint_score_attribute_repr = self.checkpoint_score_attribute
+
+        if self.checkpoint_at_end is None:
+            checkpoint_at_end_repr = ""
+        else:
+            checkpoint_at_end_repr = self.checkpoint_at_end
+
+        return Template("scrollableTable.html.j2").render(
+            table=tabulate(
+                {
+                    "Setting": [
+                        "Number of checkpoints to keep",
+                        "Checkpoint score attribute",
+                        "Checkpoint score order",
+                        "Checkpoint frequency",
+                        "Checkpoint at end",
+                    ],
+                    "Value": [
+                        num_to_keep_repr,
+                        checkpoint_score_attribute_repr,
+                        self.checkpoint_score_order,
+                        self.checkpoint_frequency,
+                        checkpoint_at_end_repr,
+                    ],
+                },
+                tablefmt="html",
+                showindex=False,
+                headers="keys",
+            ),
+            max_height="none",
+        )
+
+    @property
+    def _tune_legacy_checkpoint_score_attr(self) -> Optional[str]:
+        """Same as ``checkpoint_score_attr`` in ``tune.run``.
+
+        Only used for Legacy API compatibility.
+        """
+        if self.checkpoint_score_attribute is None:
+            return self.checkpoint_score_attribute
+        prefix = ""
+        if self.checkpoint_score_order == MIN:
+            prefix = "min-"
+        return f"{prefix}{self.checkpoint_score_attribute}"
+
+
+@dataclass
+@PublicAPI(stability="stable")
+class RunConfig:
+    """Runtime configuration for training and tuning runs.
+
+    Upon resuming from a training or tuning run checkpoint,
+    Ray Train/Tune will automatically apply the RunConfig from
+    the previously checkpointed run.
+
+    Args:
+        name: Name of the trial or experiment. If not provided, will be deduced
+            from the Trainable.
+        storage_path: [Beta] Path where all results and checkpoints are persisted.
+            Can be a local directory or a destination on cloud storage.
+            For multi-node training/tuning runs, this must be set to a
+            shared storage location (e.g., S3, NFS).
+            This defaults to the local ``~/ray_results`` directory.
+        storage_filesystem: [Beta] A custom filesystem to use for storage.
+            If this is provided, `storage_path` should be a path with its
+            prefix stripped (e.g., `s3://bucket/path` -> `bucket/path`).
+        failure_config: Failure mode configuration.
+        checkpoint_config: Checkpointing configuration.
+        sync_config: Configuration object for syncing. See train.SyncConfig.
+        verbose: 0, 1, or 2. Verbosity mode.
+            0 = silent, 1 = default, 2 = verbose. Defaults to 1.
+            If the ``RAY_AIR_NEW_OUTPUT=1`` environment variable is set,
+            uses the old verbosity settings:
+            0 = silent, 1 = only status updates, 2 = status and brief
+            results, 3 = status and detailed results.
+        stop: Stop conditions to consider. Refer to ray.tune.stopper.Stopper
+            for more info. Stoppers should be serializable.
+        callbacks: [DeveloperAPI] Callbacks to invoke.
+            Refer to ray.tune.callback.Callback for more info.
+            Callbacks should be serializable.
+            Currently only stateless callbacks are supported for resumed runs.
+            (any state of the callback will not be checkpointed by Tune
+            and thus will not take effect in resumed runs).
+        progress_reporter: [DeveloperAPI] Progress reporter for reporting
+            intermediate experiment progress. Defaults to CLIReporter if
+            running in command-line, or JupyterNotebookReporter if running in
+            a Jupyter notebook.
+        log_to_file: [DeveloperAPI] Log stdout and stderr to files in
+            trial directories. If this is `False` (default), no files
+            are written. If `true`, outputs are written to `trialdir/stdout`
+            and `trialdir/stderr`, respectively. If this is a single string,
+            this is interpreted as a file relative to the trialdir, to which
+            both streams are written. If this is a Sequence (e.g. a Tuple),
+            it has to have length 2 and the elements indicate the files to
+            which stdout and stderr are written, respectively.
+
+    """
+
+    name: Optional[str] = None
+    storage_path: Optional[str] = None
+    storage_filesystem: Optional[pyarrow.fs.FileSystem] = None
+    failure_config: Optional[FailureConfig] = None
+    checkpoint_config: Optional[CheckpointConfig] = None
+    sync_config: Optional["SyncConfig"] = None
+    verbose: Optional[Union[int, "AirVerbosity", "Verbosity"]] = None
+    stop: Optional[Union[Mapping, "Stopper", Callable[[str, Mapping], bool]]] = None
+    callbacks: Optional[List["Callback"]] = None
+    progress_reporter: Optional[
+        "ray.tune.progress_reporter.ProgressReporter"  # noqa: F821
+    ] = None
+    log_to_file: Union[bool, str, Tuple[str, str]] = False
+
+    # Deprecated
+    local_dir: Optional[str] = None
+
+    def __post_init__(self):
+        from ray.train import SyncConfig
+        from ray.train.constants import DEFAULT_STORAGE_PATH
+        from ray.tune.experimental.output import AirVerbosity, get_air_verbosity
+
+        if self.local_dir is not None:
+            raise DeprecationWarning(
+                "The `RunConfig(local_dir)` argument is deprecated. "
+                "You should set the `RunConfig(storage_path)` instead."
+                "See the docs: https://docs.ray.io/en/latest/train/user-guides/"
+                "persistent-storage.html#setting-the-local-staging-directory"
+            )
+
+        if self.storage_path is None:
+            # TODO(justinvyu): [Deprecated] Remove in 2.30
+            self.storage_path = DEFAULT_STORAGE_PATH
+
+            # If no remote path is set, try to get Ray Storage URI
+            ray_storage_uri: Optional[str] = _get_storage_uri()
+            if ray_storage_uri is not None:
+                logger.info(
+                    "Using configured Ray Storage URI as the `storage_path`: "
+                    f"{ray_storage_uri}"
+                )
+                self.storage_path = ray_storage_uri
+
+        if not self.failure_config:
+            self.failure_config = FailureConfig()
+
+        if not self.sync_config:
+            self.sync_config = SyncConfig()
+
+        if not self.checkpoint_config:
+            self.checkpoint_config = CheckpointConfig()
+
+        if self.verbose is None:
+            # Default `verbose` value. For new output engine,
+            # this is AirVerbosity.DEFAULT.
+            # For old output engine, this is Verbosity.V3_TRIAL_DETAILS
+            # Todo (krfricke): Currently uses number to pass test_configs::test_repr
+            self.verbose = get_air_verbosity(AirVerbosity.DEFAULT) or 3
+
+        if isinstance(self.storage_path, Path):
+            self.storage_path = self.storage_path.as_posix()
+
+    def __repr__(self):
+        from ray.train import SyncConfig
+
+        return _repr_dataclass(
+            self,
+            default_values={
+                "failure_config": FailureConfig(),
+                "sync_config": SyncConfig(),
+                "checkpoint_config": CheckpointConfig(),
+            },
+        )
+
+    def _repr_html_(self) -> str:
+        reprs = []
+        if self.failure_config is not None:
+            reprs.append(
+                Template("title_data_mini.html.j2").render(
+                    title="Failure Config", data=self.failure_config._repr_html_()
+                )
+            )
+        if self.sync_config is not None:
+            reprs.append(
+                Template("title_data_mini.html.j2").render(
+                    title="Sync Config", data=self.sync_config._repr_html_()
+                )
+            )
+        if self.checkpoint_config is not None:
+            reprs.append(
+                Template("title_data_mini.html.j2").render(
+                    title="Checkpoint Config", data=self.checkpoint_config._repr_html_()
+                )
+            )
+
+        # Create a divider between each displayed repr
+        subconfigs = [Template("divider.html.j2").render()] * (2 * len(reprs) - 1)
+        subconfigs[::2] = reprs
+
+        settings = Template("scrollableTable.html.j2").render(
+            table=tabulate(
+                {
+                    "Name": self.name,
+                    "Local results directory": self.local_dir,
+                    "Verbosity": self.verbose,
+                    "Log to file": self.log_to_file,
+                }.items(),
+                tablefmt="html",
+                headers=["Setting", "Value"],
+                showindex=False,
+            ),
+            max_height="300px",
+        )
+
+        return Template("title_data.html.j2").render(
+            title="RunConfig",
+            data=Template("run_config.html.j2").render(
+                subconfigs=subconfigs,
+                settings=settings,
+            ),
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/air/constants.py b/.venv/lib/python3.11/site-packages/ray/air/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e79f848b9f1cbfa7c61f9903abee4e1c4390a4c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/constants.py
@@ -0,0 +1,94 @@
+# Key to denote the preprocessor in the checkpoint dict.
+PREPROCESSOR_KEY = "_preprocessor"
+
+# Key to denote the model in the checkpoint dict.
+MODEL_KEY = "model"
+
+# Key to denote which dataset is the evaluation dataset.
+# Only used in trainers which do not support multiple
+# evaluation datasets.
+EVALUATION_DATASET_KEY = "evaluation"
+
+# Key to denote which dataset is the training dataset.
+# This is the dataset that the preprocessor is fit on.
+TRAIN_DATASET_KEY = "train"
+
+# Name to use for the column when representing tensors in table format.
+TENSOR_COLUMN_NAME = "__value__"
+
+# The maximum length of strings returned by `__repr__` for AIR objects constructed with
+# default values.
+MAX_REPR_LENGTH = int(80 * 1.5)
+
+# Timeout used when putting exceptions raised by runner thread into the queue.
+_ERROR_REPORT_TIMEOUT = 10
+
+# Timeout when fetching new results after signaling the training function to continue.
+_RESULT_FETCH_TIMEOUT = 0.2
+
+# Timeout for fetching exceptions raised by the training function.
+_ERROR_FETCH_TIMEOUT = 1
+
+# The key used to identify whether we have already warned about ray.air.session
+# functions being used outside of the session
+SESSION_MISUSE_LOG_ONCE_KEY = "air_warn_session_misuse"
+
+# Name of attribute in Checkpoint storing current Tune ID for restoring
+# training with Ray Train
+CHECKPOINT_ID_ATTR = "_current_checkpoint_id"
+
+# Name of the marker dropped by the Trainable. If a worker detects
+# the presence of the marker in the trial dir, it will use lazy
+# checkpointing.
+LAZY_CHECKPOINT_MARKER_FILE = ".lazy_checkpoint_marker"
+
+
+# The timestamp of when the result is generated.
+# Default to when the result is processed by tune.
+TIMESTAMP = "timestamp"
+
+# (Auto-filled) Time in seconds this iteration took to run.
+# This may be overridden to override the system-computed time difference.
+TIME_THIS_ITER_S = "time_this_iter_s"
+
+# (Auto-filled) The index of this training iteration.
+TRAINING_ITERATION = "training_iteration"
+
+# File that stores parameters of the trial.
+EXPR_PARAM_FILE = "params.json"
+
+# Pickle File that stores parameters of the trial.
+EXPR_PARAM_PICKLE_FILE = "params.pkl"
+
+# File that stores the progress of the trial.
+EXPR_PROGRESS_FILE = "progress.csv"
+
+# File that stores results of the trial.
+EXPR_RESULT_FILE = "result.json"
+
+# File that stores the pickled error file
+EXPR_ERROR_PICKLE_FILE = "error.pkl"
+
+# File that stores the error file
+EXPR_ERROR_FILE = "error.txt"
+
+# File that stores the checkpoint metadata
+CHECKPOINT_TUNE_METADATA_FILE = ".tune_metadata"
+
+# ==================================================
+#               Environment Variables
+# ==================================================
+
+# Integer value which if set will copy files in reported AIR directory
+# checkpoints instead of moving them (if worker is on the same node as Trainable)
+COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING_ENV = (
+    "TRAIN_COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING"
+)
+
+# NOTE: When adding a new environment variable, please track it in this list.
+# TODO(ml-team): Most env var constants should get moved here.
+AIR_ENV_VARS = {
+    COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING_ENV,
+    "RAY_AIR_FULL_TRACEBACKS",
+    "RAY_AIR_NEW_OUTPUT",
+}
diff --git a/.venv/lib/python3.11/site-packages/ray/air/data_batch_type.py b/.venv/lib/python3.11/site-packages/ray/air/data_batch_type.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d5d09b3218ee11299dfc1ff6aa5d44b2fff67d4
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/data_batch_type.py
@@ -0,0 +1,11 @@
+from typing import TYPE_CHECKING, Dict, Union
+
+if TYPE_CHECKING:
+    import numpy
+    import pandas  # noqa: F401
+    import pyarrow
+
+# TODO de-dup with ray.data.block.DataBatch
+DataBatchType = Union[
+    "numpy.ndarray", "pyarrow.Table" "pandas.DataFrame", Dict[str, "numpy.ndarray"]
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/execution/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..991a5edbf194c675203a6115d66132f62f9e38e9
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/__init__.py
@@ -0,0 +1,12 @@
+from ray.air.execution.resources.fixed import FixedResourceManager
+from ray.air.execution.resources.placement_group import PlacementGroupResourceManager
+from ray.air.execution.resources.request import AcquiredResources, ResourceRequest
+from ray.air.execution.resources.resource_manager import ResourceManager
+
+__all__ = [
+    "ResourceRequest",
+    "AcquiredResources",
+    "ResourceManager",
+    "FixedResourceManager",
+    "PlacementGroupResourceManager",
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e0bc071a989890e3a32bdce8659b0b44f629b51
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..991a5edbf194c675203a6115d66132f62f9e38e9
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__init__.py
@@ -0,0 +1,12 @@
+from ray.air.execution.resources.fixed import FixedResourceManager
+from ray.air.execution.resources.placement_group import PlacementGroupResourceManager
+from ray.air.execution.resources.request import AcquiredResources, ResourceRequest
+from ray.air.execution.resources.resource_manager import ResourceManager
+
+__all__ = [
+    "ResourceRequest",
+    "AcquiredResources",
+    "ResourceManager",
+    "FixedResourceManager",
+    "PlacementGroupResourceManager",
+]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/fixed.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/fixed.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d0084d4d80df1f899ade072fdd71f0a40a900827
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/fixed.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/placement_group.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/placement_group.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a6e82db9365e720431caae0877d1cc77932017b4
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/placement_group.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/request.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/request.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f10de8ddc5e5b1ae0916da22a051230ea3883ae
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/request.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/resource_manager.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/resource_manager.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..63cc03a234234d226ce9d0b52fe309fa898eaded
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/__pycache__/resource_manager.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/fixed.py b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/fixed.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e969ed7c23cccf9d16af62bc0e421df2cc53ff0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/fixed.py
@@ -0,0 +1,147 @@
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+import ray
+from ray import LOCAL_MODE, SCRIPT_MODE
+from ray.air.execution.resources.request import (
+    AcquiredResources,
+    RemoteRayEntity,
+    ResourceRequest,
+)
+from ray.air.execution.resources.resource_manager import ResourceManager
+from ray.util.annotations import DeveloperAPI
+
+# Avoid numerical errors by multiplying and subtracting with this number.
+# Compare: 0.99 - 0.33 = 0.65999... vs (0.99 * 1000 - 0.33 * 1000) / 1000 = 0.66
+_DIGITS = 100000
+
+
+@DeveloperAPI
+@dataclass
+class FixedAcquiredResources(AcquiredResources):
+    bundles: List[Dict[str, float]]
+
+    def _annotate_remote_entity(
+        self, entity: RemoteRayEntity, bundle: Dict[str, float], bundle_index: int
+    ) -> RemoteRayEntity:
+        bundle = bundle.copy()
+        num_cpus = bundle.pop("CPU", 0)
+        num_gpus = bundle.pop("GPU", 0)
+        memory = bundle.pop("memory", 0.0)
+
+        return entity.options(
+            num_cpus=num_cpus,
+            num_gpus=num_gpus,
+            memory=memory,
+            resources=bundle,
+        )
+
+
+@DeveloperAPI
+class FixedResourceManager(ResourceManager):
+    """Fixed budget based resource manager.
+
+    This resource manager keeps track of a fixed set of resources. When resources
+    are acquired, they are subtracted from the budget. When resources are freed,
+    they are added back to the budget.
+
+    The resource manager still requires resources to be requested before they become
+    available. However, because the resource requests are virtual, this will not
+    trigger autoscaling.
+
+    Additionally, resources are not reserved on request, only on acquisition. Thus,
+    acquiring a resource can change the availability of other requests. Note that
+    this behavior may be changed in future implementations.
+
+    The fixed resource manager does not support placement strategies. Using
+    ``STRICT_SPREAD`` will result in an error. ``STRICT_PACK`` will succeed only
+    within a placement group bundle. All other placement group arguments will be
+    ignored.
+
+    Args:
+        total_resources: Budget of resources to manage. Defaults to all available
+            resources in the current task or all cluster resources (if outside a task).
+
+    """
+
+    _resource_cls: AcquiredResources = FixedAcquiredResources
+
+    def __init__(self, total_resources: Optional[Dict[str, float]] = None):
+        rtc = ray.get_runtime_context()
+
+        if not total_resources:
+            if rtc.worker.mode in {None, SCRIPT_MODE, LOCAL_MODE}:
+                total_resources = ray.cluster_resources()
+            else:
+                total_resources = rtc.get_assigned_resources()
+
+        # If we are in a placement group, all of our resources will be in a bundle
+        # and thus fulfill requirements of STRICT_PACK - but only if child tasks
+        # are captured by the pg.
+        self._allow_strict_pack = (
+            ray.util.get_current_placement_group() is not None
+            and rtc.should_capture_child_tasks_in_placement_group
+        )
+
+        self._total_resources = total_resources
+        self._requested_resources = []
+        self._used_resources = []
+
+    @property
+    def _available_resources(self) -> Dict[str, float]:
+        available_resources = self._total_resources.copy()
+
+        for used_resources in self._used_resources:
+            all_resources = used_resources.required_resources
+            for k, v in all_resources.items():
+                available_resources[k] = (
+                    available_resources[k] * _DIGITS - v * _DIGITS
+                ) / _DIGITS
+        return available_resources
+
+    def request_resources(self, resource_request: ResourceRequest):
+        if resource_request.strategy == "STRICT_SPREAD" or (
+            not self._allow_strict_pack and resource_request.strategy == "STRICT_PACK"
+        ):
+            raise RuntimeError(
+                f"Requested a resource with placement strategy "
+                f"{resource_request.strategy}, but this cannot be fulfilled by a "
+                f"FixedResourceManager. In a nested setting, please set the inner "
+                f"placement strategy to be less restrictive (i.e. no STRICT_ strategy)."
+            )
+
+        self._requested_resources.append(resource_request)
+
+    def cancel_resource_request(self, resource_request: ResourceRequest):
+        self._requested_resources.remove(resource_request)
+
+    def has_resources_ready(self, resource_request: ResourceRequest) -> bool:
+        if resource_request not in self._requested_resources:
+            return False
+
+        available_resources = self._available_resources
+        all_resources = resource_request.required_resources
+        for k, v in all_resources.items():
+            if available_resources.get(k, 0.0) < v:
+                return False
+        return True
+
+    def acquire_resources(
+        self, resource_request: ResourceRequest
+    ) -> Optional[AcquiredResources]:
+        if not self.has_resources_ready(resource_request):
+            return None
+
+        self._used_resources.append(resource_request)
+        return self._resource_cls(
+            bundles=resource_request.bundles, resource_request=resource_request
+        )
+
+    def free_resources(self, acquired_resource: AcquiredResources):
+        resources = acquired_resource.resource_request
+        self._used_resources.remove(resources)
+
+    def clear(self):
+        # Reset internal state
+        self._requested_resources = []
+        self._used_resources = []
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/placement_group.py b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/placement_group.py
new file mode 100644
index 0000000000000000000000000000000000000000..d804fc3750a38129fe17f718863b10599e4e170b
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/placement_group.py
@@ -0,0 +1,214 @@
+import time
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Set
+
+import ray
+from ray.air.execution.resources.request import (
+    AcquiredResources,
+    RemoteRayEntity,
+    ResourceRequest,
+)
+from ray.air.execution.resources.resource_manager import ResourceManager
+from ray.util.annotations import DeveloperAPI
+from ray.util.placement_group import PlacementGroup, remove_placement_group
+from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+
+
+@DeveloperAPI
+@dataclass
+class PlacementGroupAcquiredResources(AcquiredResources):
+    placement_group: PlacementGroup
+
+    def _annotate_remote_entity(
+        self, entity: RemoteRayEntity, bundle: Dict[str, float], bundle_index: int
+    ) -> RemoteRayEntity:
+        bundle = bundle.copy()
+        num_cpus = bundle.pop("CPU", 0)
+        num_gpus = bundle.pop("GPU", 0)
+        memory = bundle.pop("memory", 0.0)
+
+        return entity.options(
+            scheduling_strategy=PlacementGroupSchedulingStrategy(
+                placement_group=self.placement_group,
+                placement_group_bundle_index=bundle_index,
+                placement_group_capture_child_tasks=True,
+            ),
+            num_cpus=num_cpus,
+            num_gpus=num_gpus,
+            memory=memory,
+            resources=bundle,
+        )
+
+
+@DeveloperAPI
+class PlacementGroupResourceManager(ResourceManager):
+    """Resource manager using placement groups as the resource backend.
+
+    This manager will use placement groups to fulfill resource requests. Requesting
+    a resource will schedule the placement group. Acquiring a resource will
+    return a ``PlacementGroupAcquiredResources`` that can be used to schedule
+    Ray tasks and actors on the placement group. Freeing an acquired resource
+    will destroy the associated placement group.
+
+    Ray core does not emit events when resources are available. Instead, the
+    scheduling state has to be periodically updated.
+
+    Per default, placement group scheduling state is refreshed every time when
+    resource state is inquired, but not more often than once every ``update_interval_s``
+    seconds. Alternatively, staging futures can be retrieved (and awaited) with
+    ``get_resource_futures()`` and state update can be force with ``update_state()``.
+
+    Args:
+        update_interval_s: Minimum interval in seconds between updating scheduling
+            state of placement groups.
+
+    """
+
+    _resource_cls: AcquiredResources = PlacementGroupAcquiredResources
+
+    def __init__(self, update_interval_s: float = 0.1):
+        # Internally, the placement group lifecycle is like this:
+        # - Resources are requested with ``request_resources()``
+        # - A placement group is scheduled ("staged")
+        # - A ``PlacementGroup.ready()`` future is scheduled ("staging future")
+        # - We update the scheduling state when we need to
+        #   (e.g. when ``has_resources_ready()`` is called)
+        # - When staging futures resolve, a placement group is moved from "staging"
+        #   to "ready"
+        # - When a resource request is canceled, we remove a placement group from
+        #   "staging". If there are not staged placement groups
+        #   (because they are already "ready"), we remove one from "ready" instead.
+        # - When a resource is acquired, the pg is removed from "ready" and moved
+        #   to "acquired"
+        # - When a resource is freed, the pg is removed from "acquired" and destroyed
+
+        # Mapping of placement group to request
+        self._pg_to_request: Dict[PlacementGroup, ResourceRequest] = {}
+
+        # PGs that are staged but not "ready", yet (i.e. not CREATED)
+        self._request_to_staged_pgs: Dict[
+            ResourceRequest, Set[PlacementGroup]
+        ] = defaultdict(set)
+
+        # PGs that are CREATED and can be used by tasks and actors
+        self._request_to_ready_pgs: Dict[
+            ResourceRequest, Set[PlacementGroup]
+        ] = defaultdict(set)
+
+        # Staging futures used to update internal state.
+        # We keep a double mapping here for better lookup efficiency.
+        self._staging_future_to_pg: Dict[ray.ObjectRef, PlacementGroup] = dict()
+        self._pg_to_staging_future: Dict[PlacementGroup, ray.ObjectRef] = dict()
+
+        # Set of acquired PGs. We keep track of these here to make sure we
+        # only free PGs that this manager managed.
+        self._acquired_pgs: Set[PlacementGroup] = set()
+
+        # Minimum time between updates of the internal state
+        self.update_interval_s = update_interval_s
+        self._last_update = time.monotonic() - self.update_interval_s - 1
+
+    def get_resource_futures(self) -> List[ray.ObjectRef]:
+        return list(self._staging_future_to_pg.keys())
+
+    def _maybe_update_state(self):
+        now = time.monotonic()
+        if now > self._last_update + self.update_interval_s:
+            self.update_state()
+
+    def update_state(self):
+        ready, not_ready = ray.wait(
+            list(self._staging_future_to_pg.keys()),
+            num_returns=len(self._staging_future_to_pg),
+            timeout=0,
+        )
+        for future in ready:
+            # Remove staging future
+            pg = self._staging_future_to_pg.pop(future)
+            self._pg_to_staging_future.pop(pg)
+            # Fetch resource request
+            request = self._pg_to_request[pg]
+            # Remove from staging, add to ready
+            self._request_to_staged_pgs[request].remove(pg)
+            self._request_to_ready_pgs[request].add(pg)
+        self._last_update = time.monotonic()
+
+    def request_resources(self, resource_request: ResourceRequest):
+        pg = resource_request.to_placement_group()
+        self._pg_to_request[pg] = resource_request
+        self._request_to_staged_pgs[resource_request].add(pg)
+
+        future = pg.ready()
+        self._staging_future_to_pg[future] = pg
+        self._pg_to_staging_future[pg] = future
+
+    def cancel_resource_request(self, resource_request: ResourceRequest):
+        if self._request_to_staged_pgs[resource_request]:
+            pg = self._request_to_staged_pgs[resource_request].pop()
+
+            # PG was staging
+            future = self._pg_to_staging_future.pop(pg)
+            self._staging_future_to_pg.pop(future)
+
+            # Cancel the pg.ready task.
+            # Otherwise, it will be pending node assignment forever.
+            ray.cancel(future)
+        else:
+            # PG might be ready
+            pg = self._request_to_ready_pgs[resource_request].pop()
+            if not pg:
+                raise RuntimeError(
+                    "Cannot cancel resource request: No placement group was "
+                    f"staged or is ready. Make sure to not cancel more resource "
+                    f"requests than you've created. Request: {resource_request}"
+                )
+
+        self._pg_to_request.pop(pg)
+        ray.util.remove_placement_group(pg)
+
+    def has_resources_ready(self, resource_request: ResourceRequest) -> bool:
+        if not bool(len(self._request_to_ready_pgs[resource_request])):
+            # Only update state if needed
+            self._maybe_update_state()
+
+        return bool(len(self._request_to_ready_pgs[resource_request]))
+
+    def acquire_resources(
+        self, resource_request: ResourceRequest
+    ) -> Optional[PlacementGroupAcquiredResources]:
+        if not self.has_resources_ready(resource_request):
+            return None
+
+        pg = self._request_to_ready_pgs[resource_request].pop()
+        self._acquired_pgs.add(pg)
+
+        return self._resource_cls(placement_group=pg, resource_request=resource_request)
+
+    def free_resources(self, acquired_resource: PlacementGroupAcquiredResources):
+        pg = acquired_resource.placement_group
+
+        self._acquired_pgs.remove(pg)
+        remove_placement_group(pg)
+        self._pg_to_request.pop(pg)
+
+    def clear(self):
+        if not ray.is_initialized():
+            return
+
+        for staged_pgs in self._request_to_staged_pgs.values():
+            for staged_pg in staged_pgs:
+                remove_placement_group(staged_pg)
+
+        for ready_pgs in self._request_to_ready_pgs.values():
+            for ready_pg in ready_pgs:
+                remove_placement_group(ready_pg)
+
+        for acquired_pg in self._acquired_pgs:
+            remove_placement_group(acquired_pg)
+
+        # Reset internal state
+        self.__init__(update_interval_s=self.update_interval_s)
+
+    def __del__(self):
+        self.clear()
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/request.py b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/request.py
new file mode 100644
index 0000000000000000000000000000000000000000..7777fe297b6e0c7f291349435563e7ac7d3dac2a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/request.py
@@ -0,0 +1,255 @@
+import abc
+import json
+from copy import deepcopy
+from dataclasses import dataclass
+from inspect import signature
+from typing import Dict, List, Union
+
+import ray
+from ray.util import placement_group
+from ray.util.annotations import DeveloperAPI
+
+RemoteRayEntity = Union[ray.remote_function.RemoteFunction, ray.actor.ActorClass]
+
+
+def _sum_bundles(bundles: List[Dict[str, float]]) -> Dict[str, float]:
+    """Sum all resources in a list of resource bundles.
+
+    Args:
+        bundles: List of resource bundles.
+
+    Returns: Dict containing all resources summed up.
+    """
+    resources = {}
+    for bundle in bundles:
+        for k, v in bundle.items():
+            resources[k] = resources.get(k, 0) + v
+    return resources
+
+
+@DeveloperAPI
+class ResourceRequest:
+    """Request for resources.
+
+    This class is used to define a resource request. A resource request comprises one
+    or more bundles of resources and instructions on the scheduling behavior.
+
+    The resource request can be submitted to a resource manager, which will
+    schedule the resources. Depending on the resource backend, this may instruct
+    Ray to scale up (autoscaling).
+
+    Resource requests are compatible with the most fine-grained low-level resource
+    backend, which are Ray placement groups.
+
+    Args:
+        bundles: A list of bundles which represent the resources requirements.
+            E.g. ``[{"CPU": 1, "GPU": 1}]``.
+        strategy: The scheduling strategy to acquire the bundles.
+
+         - "PACK": Packs Bundles into as few nodes as possible.
+         - "SPREAD": Places Bundles across distinct nodes as even as possible.
+         - "STRICT_PACK": Packs Bundles into one node. The group is
+           not allowed to span multiple nodes.
+         - "STRICT_SPREAD": Packs Bundles across distinct nodes.
+        *args: Passed to the call of ``placement_group()``, if applicable.
+        **kwargs: Passed to the call of ``placement_group()``, if applicable.
+
+    """
+
+    def __init__(
+        self,
+        bundles: List[Dict[str, Union[int, float]]],
+        strategy: str = "PACK",
+        *args,
+        **kwargs,
+    ):
+        if not bundles:
+            raise ValueError("Cannot initialize a ResourceRequest with zero bundles.")
+
+        # Remove empty resource keys
+        self._bundles = [
+            {k: float(v) for k, v in bundle.items() if v != 0} for bundle in bundles
+        ]
+
+        # Check if the head bundle is empty (no resources defined or all resources
+        # are 0 (and thus removed in the previous step)
+        if not self._bundles[0]:
+            # This is when the head bundle doesn't need resources.
+            self._head_bundle_is_empty = True
+            self._bundles.pop(0)
+
+            if not self._bundles:
+                raise ValueError(
+                    "Cannot initialize a ResourceRequest with an empty head "
+                    "and zero worker bundles."
+                )
+        else:
+            self._head_bundle_is_empty = False
+
+        self._strategy = strategy
+        self._args = args
+        self._kwargs = kwargs
+
+        self._hash = None
+        self._bound = None
+
+        self._bind()
+
+    @property
+    def head_bundle_is_empty(self):
+        """Returns True if head bundle is empty while child bundles
+        need resources.
+
+        This is considered an internal API within Tune.
+        """
+        return self._head_bundle_is_empty
+
+    @property
+    @DeveloperAPI
+    def head_cpus(self) -> float:
+        """Returns the number of cpus in the head bundle."""
+        return 0.0 if self._head_bundle_is_empty else self._bundles[0].get("CPU", 0.0)
+
+    @property
+    @DeveloperAPI
+    def bundles(self) -> List[Dict[str, float]]:
+        """Returns a deep copy of resource bundles"""
+        return deepcopy(self._bundles)
+
+    @property
+    def required_resources(self) -> Dict[str, float]:
+        """Returns a dict containing the sums of all resources"""
+        return _sum_bundles(self._bundles)
+
+    @property
+    @DeveloperAPI
+    def strategy(self) -> str:
+        """Returns the placement strategy"""
+        return self._strategy
+
+    def _bind(self):
+        """Bind the args and kwargs to the `placement_group()` signature.
+
+        We bind the args and kwargs, so we can compare equality of two resource
+        requests. The main reason for this is that the `placement_group()` API
+        can evolve independently from the ResourceRequest API (e.g. adding new
+        arguments). Then, `ResourceRequest(bundles, strategy, arg=arg)` should
+        be the same as `ResourceRequest(bundles, strategy, arg)`.
+        """
+        sig = signature(placement_group)
+        try:
+            self._bound = sig.bind(
+                self._bundles, self._strategy, *self._args, **self._kwargs
+            )
+        except Exception as exc:
+            raise RuntimeError(
+                "Invalid definition for resource request. Please check "
+                "that you passed valid arguments to the ResourceRequest "
+                "object."
+            ) from exc
+
+    def to_placement_group(self):
+        return placement_group(*self._bound.args, **self._bound.kwargs)
+
+    def __eq__(self, other: "ResourceRequest"):
+        return (
+            isinstance(other, ResourceRequest)
+            and self._bound == other._bound
+            and self.head_bundle_is_empty == other.head_bundle_is_empty
+        )
+
+    def __hash__(self):
+        if not self._hash:
+            # Cache hash
+            self._hash = hash(
+                json.dumps(
+                    {"args": self._bound.args, "kwargs": self._bound.kwargs},
+                    sort_keys=True,
+                    indent=0,
+                    ensure_ascii=True,
+                )
+            )
+        return self._hash
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state.pop("_hash", None)
+        state.pop("_bound", None)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self._hash = None
+        self._bound = None
+        self._bind()
+
+    def __repr__(self) -> str:
+        return (
+            f"<ResourceRequest (_bound={self._bound}, "
+            f"head_bundle_is_empty={self.head_bundle_is_empty})>"
+        )
+
+
+@DeveloperAPI
+@dataclass
+class AcquiredResources(abc.ABC):
+    """Base class for resources that have been acquired.
+
+    Acquired resources can be associated to Ray objects, which can then be
+    scheduled using these resources.
+
+    Internally this can point e.g. to a placement group, a placement
+    group bundle index, or just raw resources.
+
+    The main API is the `annotate_remote_entities` method. This will associate
+    remote Ray objects (tasks and actors) with the acquired resources by setting
+    the Ray remote options to use the acquired resources.
+    """
+
+    resource_request: ResourceRequest
+
+    def annotate_remote_entities(
+        self, entities: List[RemoteRayEntity]
+    ) -> List[Union[RemoteRayEntity]]:
+        """Return remote ray entities (tasks/actors) to use the acquired resources.
+
+        The first entity will be associated with the first bundle, the second
+        entity will be associated with the second bundle, etc.
+
+        Args:
+            entities: Remote Ray entities to annotate with the acquired resources.
+        """
+        bundles = self.resource_request.bundles
+
+        # Also count the empty head bundle as a bundle
+        num_bundles = len(bundles) + int(self.resource_request.head_bundle_is_empty)
+
+        if len(entities) > num_bundles:
+            raise RuntimeError(
+                f"The number of callables to annotate ({len(entities)}) cannot "
+                f"exceed the number of available bundles ({num_bundles})."
+            )
+
+        annotated = []
+
+        if self.resource_request.head_bundle_is_empty:
+            # The empty head bundle is place on the first bundle index with empty
+            # resources.
+            annotated.append(
+                self._annotate_remote_entity(entities[0], {}, bundle_index=0)
+            )
+
+            # Shift the remaining entities
+            entities = entities[1:]
+
+        for i, (entity, bundle) in enumerate(zip(entities, bundles)):
+            annotated.append(
+                self._annotate_remote_entity(entity, bundle, bundle_index=i)
+            )
+
+        return annotated
+
+    def _annotate_remote_entity(
+        self, entity: RemoteRayEntity, bundle: Dict[str, float], bundle_index: int
+    ) -> RemoteRayEntity:
+        raise NotImplementedError
diff --git a/.venv/lib/python3.11/site-packages/ray/air/execution/resources/resource_manager.py b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/resource_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..daa2cdd69215157e21a36ea937bb9399d5c759b3
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/execution/resources/resource_manager.py
@@ -0,0 +1,155 @@
+import abc
+from typing import List, Optional
+
+import ray
+from ray.air.execution.resources.request import AcquiredResources, ResourceRequest
+from ray.util.annotations import DeveloperAPI
+
+
+@DeveloperAPI
+class ResourceManager(abc.ABC):
+    """Resource manager interface.
+
+    A resource manager can be used to request resources from a Ray cluster and
+    allocate them to remote Ray tasks or actors.
+
+    Resources have to be requested before they can be acquired.
+
+    Resources managed by the resource manager can be in three states:
+
+    1. "Requested":  The resources have been requested but are not yet available to
+       schedule remote Ray objects. The resource request may trigger autoscaling,
+       and can be cancelled if no longer needed.
+    2. "Ready": The requested resources are now available to schedule remote Ray
+       objects. They can be acquired and subsequently used remote Ray objects.
+       The resource request can still be cancelled if no longer needed.
+    3. "Acquired": The resources have been acquired by a caller to use for scheduling
+       remote Ray objects. Note that it is the responsibility of the caller to
+       schedule the Ray objects with these resources.
+       The associated resource request has been completed and can no longer be
+       cancelled. The acquired resources can be freed by the resource manager when
+       they are no longer used.
+
+    The flow is as follows:
+
+    .. code-block:: python
+
+        # Create resource manager
+        resource_manager = ResourceManager()
+
+        # Create resource request
+        resource_request = ResourceRequest([{"CPU": 4}])
+
+        # Pass to resource manager
+        resource_manager.request_resources(resource_request)
+
+        # Wait until ready
+        while not resource_manager.has_resources_ready(resource_request):
+            time.sleep(1)
+
+        # Once ready, acquire resources
+        acquired_resource = resource_manager.acquire_resources(resource_request)
+
+        # Bind to remote task or actor
+        annotated_remote_fn = acquired_resource.annotate_remote_entities(
+            [remote_fn])
+
+        # Run remote function. This will use the acquired resources
+        ray.get(annotated_remote_fn.remote())
+
+        # After using the resources, free
+        resource_manager.free_resources(annotated_resources)
+
+    """
+
+    def request_resources(self, resource_request: ResourceRequest):
+        """Request resources.
+
+        Depending on the backend, resources can trigger autoscaling. Requested
+        resources can be ready or not ready. Once they are "ready", they can
+        be acquired and used by remote Ray objects.
+
+        Resource requests can be cancelled anytime using ``cancel_resource_request()``.
+        Once acquired, the resource request is removed. Acquired resources can be
+        freed with ``free_resources()``.
+        """
+        raise NotImplementedError
+
+    def cancel_resource_request(self, resource_request: ResourceRequest):
+        """Cancel resource request.
+
+        Resource requests can be cancelled anytime before a resource is acquired.
+        Acquiring a resource will remove the associated resource request.
+        Acquired resources can be freed with ``free_resources()``.
+        """
+        raise NotImplementedError
+
+    def has_resources_ready(self, resource_request: ResourceRequest) -> bool:
+        """Returns True if resources for the given request are ready to be acquired."""
+        raise NotImplementedError
+
+    def acquire_resources(
+        self, resource_request: ResourceRequest
+    ) -> Optional[AcquiredResources]:
+        """Acquire resources. Returns None if resources are not ready to be acquired.
+
+        Acquiring resources will remove the associated resource request.
+        Acquired resources can be returned with ``free_resources()``.
+        """
+        raise NotImplementedError
+
+    def free_resources(self, acquired_resource: AcquiredResources):
+        """Free acquired resources from usage and return them to the resource manager.
+
+        Freeing resources will return the resources to the manager, but there are
+        no guarantees about the tasks and actors scheduled on the resources. The caller
+        should make sure that any references to tasks or actors scheduled on the
+        resources have been removed before calling ``free_resources()``.
+        """
+        raise NotImplementedError
+
+    def get_resource_futures(self) -> List[ray.ObjectRef]:
+        """Return futures for resources to await.
+
+        Depending on the backend, we use resource futures to determine availability
+        of resources (e.g. placement groups) or resolution of requests.
+        In this case, the futures can be awaited externally by the caller.
+
+        When a resource future resolved, the caller may call ``update_state()``
+        to force the resource manager to update its internal state immediately.
+        """
+        return []
+
+    def update_state(self):
+        """Update internal state of the resource manager.
+
+        The resource manager may have internal state that needs periodic updating.
+        For instance, depending on the backend, resource futures can be awaited
+        externally (with ``get_resource_futures()``).
+
+        If such a future resolved, the caller can instruct the resource
+        manager to update its internal state immediately.
+        """
+        pass
+
+    def clear(self):
+        """Reset internal state and clear all resources.
+
+        Calling this method will reset the resource manager to its initialization state.
+        All resources will be removed.
+
+        Clearing the state will remove tracked resources from the manager, but there are
+        no guarantees about the tasks and actors scheduled on the resources. The caller
+        should make sure that any references to tasks or actors scheduled on the
+        resources have been removed before calling ``clear()``.
+        """
+        raise NotImplementedError
+
+    def __reduce__(self):
+        """We disallow serialization.
+
+        Shared resource managers should live on an actor.
+        """
+        raise ValueError(
+            f"Resource managers cannot be serialized. Resource manager: {str(self)}"
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/air/result.py b/.venv/lib/python3.11/site-packages/ray/air/result.py
new file mode 100644
index 0000000000000000000000000000000000000000..b012365ed841c751f97fc89d098af6df48a510f6
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/result.py
@@ -0,0 +1,283 @@
+import io
+import json
+import logging
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+
+import pandas as pd
+import pyarrow
+
+import ray
+from ray.air.constants import (
+    EXPR_ERROR_PICKLE_FILE,
+    EXPR_PROGRESS_FILE,
+    EXPR_RESULT_FILE,
+)
+from ray.util.annotations import PublicAPI
+
+if TYPE_CHECKING:
+    from ray.train import Checkpoint
+
+logger = logging.getLogger(__name__)
+
+
+@PublicAPI(stability="stable")
+@dataclass
+class Result:
+    """The final result of a ML training run or a Tune trial.
+
+    This is the output produced by ``Trainer.fit``.
+    ``Tuner.fit`` outputs a :class:`~ray.tune.ResultGrid` that is a collection
+    of ``Result`` objects.
+
+    This API is the recommended way to access the outputs such as:
+    - checkpoints (``Result.checkpoint``)
+    - the history of reported metrics (``Result.metrics_dataframe``, ``Result.metrics``)
+    - errors encountered during a training run (``Result.error``)
+
+    The constructor is a private API -- use ``Result.from_path`` to create a result
+    object from a directory.
+
+    Attributes:
+        metrics: The latest set of reported metrics.
+        checkpoint: The latest checkpoint.
+        error: The execution error of the Trainable run, if the trial finishes in error.
+        path: Path pointing to the result directory on persistent storage. This can
+            point to a remote storage location (e.g. S3) or to a local location (path
+            on the head node). The path is accessible via the result's associated
+            `filesystem`. For instance, for a result stored in S3 at
+            ``s3://bucket/location``, ``path`` will have the value ``bucket/location``.
+        metrics_dataframe: The full result dataframe of the Trainable.
+            The dataframe is indexed by iterations and contains reported
+            metrics. Note that the dataframe columns are indexed with the
+            *flattened* keys of reported metrics, so the format of this dataframe
+            may be slightly different than ``Result.metrics``, which is an unflattened
+            dict of the latest set of reported metrics.
+        best_checkpoints: A list of tuples of the best checkpoints and
+            their associated metrics. The number of
+            saved checkpoints is determined by :class:`~ray.train.CheckpointConfig`
+            (by default, all checkpoints will be saved).
+    """
+
+    metrics: Optional[Dict[str, Any]]
+    checkpoint: Optional["Checkpoint"]
+    error: Optional[Exception]
+    path: str
+    metrics_dataframe: Optional["pd.DataFrame"] = None
+    best_checkpoints: Optional[List[Tuple["Checkpoint", Dict[str, Any]]]] = None
+    _storage_filesystem: Optional[pyarrow.fs.FileSystem] = None
+    _items_to_repr = ["error", "metrics", "path", "filesystem", "checkpoint"]
+
+    @property
+    def config(self) -> Optional[Dict[str, Any]]:
+        """The config associated with the result."""
+        if not self.metrics:
+            return None
+        return self.metrics.get("config", None)
+
+    @property
+    def filesystem(self) -> pyarrow.fs.FileSystem:
+        """Return the filesystem that can be used to access the result path.
+
+        Returns:
+            pyarrow.fs.FileSystem implementation.
+        """
+        return self._storage_filesystem or pyarrow.fs.LocalFileSystem()
+
+    def _repr(self, indent: int = 0) -> str:
+        """Construct the representation with specified number of space indent."""
+        from ray.tune.experimental.output import BLACKLISTED_KEYS
+        from ray.tune.result import AUTO_RESULT_KEYS
+
+        shown_attributes = {k: getattr(self, k) for k in self._items_to_repr}
+        if self.error:
+            shown_attributes["error"] = type(self.error).__name__
+        else:
+            shown_attributes.pop("error")
+
+        shown_attributes["filesystem"] = shown_attributes["filesystem"].type_name
+
+        if self.metrics:
+            exclude = set(AUTO_RESULT_KEYS)
+            exclude.update(BLACKLISTED_KEYS)
+            shown_attributes["metrics"] = {
+                k: v for k, v in self.metrics.items() if k not in exclude
+            }
+
+        cls_indent = " " * indent
+        kws_indent = " " * (indent + 2)
+
+        kws = [
+            f"{kws_indent}{key}={value!r}" for key, value in shown_attributes.items()
+        ]
+        kws_repr = ",\n".join(kws)
+        return "{0}{1}(\n{2}\n{0})".format(cls_indent, type(self).__name__, kws_repr)
+
+    def __repr__(self) -> str:
+        return self._repr(indent=0)
+
+    @staticmethod
+    def _read_file_as_str(
+        storage_filesystem: pyarrow.fs.FileSystem,
+        storage_path: str,
+    ) -> str:
+        """Opens a file as an input stream reading all byte content sequentially and
+         decoding read bytes as utf-8 string.
+
+        Args:
+            storage_filesystem: The filesystem to use.
+            storage_path: The source to open for reading.
+        """
+
+        with storage_filesystem.open_input_stream(storage_path) as f:
+            return f.readall().decode()
+
+    @classmethod
+    def from_path(
+        cls,
+        path: Union[str, os.PathLike],
+        storage_filesystem: Optional[pyarrow.fs.FileSystem] = None,
+    ) -> "Result":
+        """Restore a Result object from local or remote trial directory.
+
+        Args:
+            path: A path of a trial directory on local or remote storage
+                (ex: s3://bucket/path or /tmp/ray_results).
+            storage_filesystem: A custom filesystem to use. If not provided,
+                this will be auto-resolved by pyarrow. If provided, the path
+                is assumed to be prefix-stripped already, and must be a valid path
+                on the filesystem.
+
+        Returns:
+            A :py:class:`Result` object of that trial.
+        """
+        # TODO(justinvyu): Fix circular dependency.
+        from ray.train import Checkpoint
+        from ray.train._internal.storage import (
+            _exists_at_fs_path,
+            _list_at_fs_path,
+            get_fs_and_path,
+        )
+        from ray.train.constants import CHECKPOINT_DIR_NAME
+
+        fs, fs_path = get_fs_and_path(path, storage_filesystem)
+        if not _exists_at_fs_path(fs, fs_path):
+            raise RuntimeError(f"Trial folder {fs_path} doesn't exist!")
+
+        # Restore metrics from result.json
+        result_json_file = Path(fs_path, EXPR_RESULT_FILE).as_posix()
+        progress_csv_file = Path(fs_path, EXPR_PROGRESS_FILE).as_posix()
+        if _exists_at_fs_path(fs, result_json_file):
+            lines = cls._read_file_as_str(fs, result_json_file).split("\n")
+            json_list = [json.loads(line) for line in lines if line]
+            metrics_df = pd.json_normalize(json_list, sep="/")
+            latest_metrics = json_list[-1] if json_list else {}
+        # Fallback to restore from progress.csv
+        elif _exists_at_fs_path(fs, progress_csv_file):
+            metrics_df = pd.read_csv(
+                io.StringIO(cls._read_file_as_str(fs, progress_csv_file))
+            )
+            latest_metrics = (
+                metrics_df.iloc[-1].to_dict() if not metrics_df.empty else {}
+            )
+        else:
+            raise RuntimeError(
+                f"Failed to restore the Result object: Neither {EXPR_RESULT_FILE}"
+                f" nor {EXPR_PROGRESS_FILE} exists in the trial folder!"
+            )
+
+        # Restore all checkpoints from the checkpoint folders
+        checkpoint_dir_names = sorted(
+            _list_at_fs_path(
+                fs,
+                fs_path,
+                file_filter=lambda file_info: file_info.type
+                == pyarrow.fs.FileType.Directory
+                and file_info.base_name.startswith("checkpoint_"),
+            )
+        )
+
+        if checkpoint_dir_names:
+            checkpoints = [
+                Checkpoint(
+                    path=Path(fs_path, checkpoint_dir_name).as_posix(), filesystem=fs
+                )
+                for checkpoint_dir_name in checkpoint_dir_names
+            ]
+
+            metrics = []
+            for checkpoint_dir_name in checkpoint_dir_names:
+                metrics_corresponding_to_checkpoint = metrics_df[
+                    metrics_df[CHECKPOINT_DIR_NAME] == checkpoint_dir_name
+                ]
+                if metrics_corresponding_to_checkpoint.empty:
+                    logger.warning(
+                        "Could not find metrics corresponding to "
+                        f"{checkpoint_dir_name}. These will default to an empty dict."
+                    )
+                metrics.append(
+                    {}
+                    if metrics_corresponding_to_checkpoint.empty
+                    else metrics_corresponding_to_checkpoint.iloc[-1].to_dict()
+                )
+
+            latest_checkpoint = checkpoints[-1]
+            # TODO(justinvyu): These are ordered by checkpoint index, since we don't
+            # know the metric to order these with.
+            best_checkpoints = list(zip(checkpoints, metrics))
+        else:
+            best_checkpoints = latest_checkpoint = None
+
+        # Restore the trial error if it exists
+        error = None
+        error_file_path = Path(fs_path, EXPR_ERROR_PICKLE_FILE).as_posix()
+        if _exists_at_fs_path(fs, error_file_path):
+            with fs.open_input_stream(error_file_path) as f:
+                error = ray.cloudpickle.load(f)
+
+        return Result(
+            metrics=latest_metrics,
+            checkpoint=latest_checkpoint,
+            path=fs_path,
+            _storage_filesystem=fs,
+            metrics_dataframe=metrics_df,
+            best_checkpoints=best_checkpoints,
+            error=error,
+        )
+
+    @PublicAPI(stability="alpha")
+    def get_best_checkpoint(self, metric: str, mode: str) -> Optional["Checkpoint"]:
+        """Get the best checkpoint from this trial based on a specific metric.
+
+        Any checkpoints without an associated metric value will be filtered out.
+
+        Args:
+            metric: The key for checkpoints to order on.
+            mode: One of ["min", "max"].
+
+        Returns:
+            :class:`Checkpoint <ray.train.Checkpoint>` object, or None if there is
+            no valid checkpoint associated with the metric.
+        """
+        if not self.best_checkpoints:
+            raise RuntimeError("No checkpoint exists in the trial directory!")
+
+        if mode not in ["max", "min"]:
+            raise ValueError(
+                f'Unsupported mode: {mode}. Please choose from ["min", "max"]!'
+            )
+
+        op = max if mode == "max" else min
+        valid_checkpoints = [
+            ckpt_info for ckpt_info in self.best_checkpoints if metric in ckpt_info[1]
+        ]
+
+        if not valid_checkpoints:
+            raise RuntimeError(
+                f"Invalid metric name {metric}! "
+                f"You may choose from the following metrics: {self.metrics.keys()}."
+            )
+
+        return op(valid_checkpoints, key=lambda x: x[1][metric])[0]
diff --git a/.venv/lib/python3.11/site-packages/ray/air/session.py b/.venv/lib/python3.11/site-packages/ray/air/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6a9fba5a6d6c89e2aeed911849e9ab136724fe1
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/session.py
@@ -0,0 +1 @@
+from ray.train._internal.session import *  # noqa: F401,F403
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/__init__.py b/.venv/lib/python3.11/site-packages/ray/air/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/pandas.py b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/pandas.py
new file mode 100644
index 0000000000000000000000000000000000000000..c52cf0b71f79c028c12257e7469739b7e62e6b86
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/tensor_extensions/pandas.py
@@ -0,0 +1,1451 @@
+# Adapted from
+# https://github.com/CODAIT/text-extensions-for-pandas/blob/dc03278689fe1c5f131573658ae19815ba25f33e/text_extensions_for_pandas/array/tensor.py
+# and
+# https://github.com/CODAIT/text-extensions-for-pandas/blob/dc03278689fe1c5f131573658ae19815ba25f33e/text_extensions_for_pandas/array/arrow_conversion.py
+
+#
+#  Copyright (c) 2020 IBM Corp.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+# Modifications:
+# - Added ArrowTensorType.to_pandas_type()
+# - Added ArrowTensorArray.__getitem__()
+# - Added ArrowTensorArray.__iter__()
+# - Added support for column casts to extension types.
+# - Fleshed out docstrings and examples.
+# - Fixed TensorArray.isna() so it returns an appropriate ExtensionArray.
+# - Added different (more vectorized) TensorArray.take() operation.
+# - Added support for more reducers (agg funcs) to TensorArray.
+# - Added support for logical operators to TensorArray(Element).
+# - Added support for heterogeneously-shaped tensors.
+# - Miscellaneous small bug fixes and optimizations.
+
+import numbers
+import os
+from typing import Any, Callable, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+from packaging.version import Version
+from pandas._typing import Dtype
+from pandas.compat import set_function_name
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.indexers import check_array_indexer, validate_indices
+
+from ray.air.util.tensor_extensions.utils import (
+    _create_possibly_ragged_ndarray,
+    _is_ndarray_variable_shaped_tensor,
+)
+from ray.util.annotations import PublicAPI
+
+try:
+    from pandas.core.dtypes.generic import ABCIndex
+except ImportError:
+    # ABCIndexClass changed to ABCIndex in Pandas 1.3
+    from pandas.core.dtypes.generic import ABCIndexClass as ABCIndex
+
+
+#############################################
+# Begin patching of ExtensionArrayFormatter #
+#############################################
+
+
+def _format_strings_patched(self) -> List[str]:
+    from pandas.core.construction import extract_array
+    from pandas.io.formats.format import format_array
+
+    if not isinstance(self.values, TensorArray):
+        return self._format_strings_orig()
+
+    values = extract_array(self.values, extract_numpy=True)
+    array = np.asarray(values)
+
+    if array.ndim == 1:
+        return self._format_strings_orig()
+
+    def format_array_wrap(array_, formatter_):
+        fmt_values = format_array(
+            array_,
+            formatter_,
+            float_format=self.float_format,
+            na_rep=self.na_rep,
+            digits=self.digits,
+            space=self.space,
+            justify=self.justify,
+            decimal=self.decimal,
+            leading_space=self.leading_space,
+            quoting=self.quoting,
+        )
+        return fmt_values
+
+    flat_formatter = self.formatter
+    if flat_formatter is None:
+        flat_formatter = values._formatter(boxed=True)
+
+    # Flatten array, call function, reshape (use ravel_compat in v1.3.0)
+    flat_array = array.ravel("K")
+    fmt_flat_array = np.asarray(format_array_wrap(flat_array, flat_formatter))
+    order = "F" if array.flags.f_contiguous else "C"
+    fmt_array = fmt_flat_array.reshape(array.shape, order=order)
+
+    # Format the array of nested strings, use default formatter
+    return format_array_wrap(fmt_array, None)
+
+
+def _format_strings_patched_v1_0_0(self) -> List[str]:
+    from functools import partial
+
+    from pandas.core.construction import extract_array
+    from pandas.io.formats.format import format_array
+    from pandas.io.formats.printing import pprint_thing
+
+    if not isinstance(self.values, TensorArray):
+        return self._format_strings_orig()
+
+    values = extract_array(self.values, extract_numpy=True)
+    array = np.asarray(values)
+
+    if array.ndim == 1:
+        return self._format_strings_orig()
+
+    def format_array_wrap(array_, formatter_):
+        fmt_values = format_array(
+            array_,
+            formatter_,
+            float_format=self.float_format,
+            na_rep=self.na_rep,
+            digits=self.digits,
+            space=self.space,
+            justify=self.justify,
+            decimal=self.decimal,
+            leading_space=self.leading_space,
+        )
+        return fmt_values
+
+    flat_formatter = self.formatter
+    if flat_formatter is None:
+        flat_formatter = values._formatter(boxed=True)
+
+    # Flatten array, call function, reshape (use ravel_compat in v1.3.0)
+    flat_array = array.ravel("K")
+    fmt_flat_array = np.asarray(format_array_wrap(flat_array, flat_formatter))
+    order = "F" if array.flags.f_contiguous else "C"
+    fmt_array = fmt_flat_array.reshape(array.shape, order=order)
+
+    # Slimmed down version of GenericArrayFormatter due to:
+    # https://github.com/pandas-dev/pandas/issues/33770
+    def format_strings_slim(array_, leading_space):
+        formatter = partial(
+            pprint_thing,
+            escape_chars=("\t", "\r", "\n"),
+        )
+
+        def _format(x):
+            return str(formatter(x))
+
+        fmt_values = []
+        for v in array_:
+            tpl = "{v}" if leading_space is False else " {v}"
+            fmt_values.append(tpl.format(v=_format(v)))
+        return fmt_values
+
+    return format_strings_slim(fmt_array, self.leading_space)
+
+
+_FORMATTER_ENABLED_ENV_VAR = "TENSOR_COLUMN_EXTENSION_FORMATTER_ENABLED"
+
+if os.getenv(_FORMATTER_ENABLED_ENV_VAR, "1") == "1":
+    if Version(pd.__version__) < Version("2.2.0"):
+        from pandas.io.formats.format import ExtensionArrayFormatter
+
+        formatter_cls = ExtensionArrayFormatter
+    else:
+        from pandas.io.formats.format import _ExtensionArrayFormatter
+
+        formatter_cls = _ExtensionArrayFormatter
+    formatter_cls._format_strings_orig = formatter_cls._format_strings
+    if Version("1.1.0") <= Version(pd.__version__) < Version("1.3.0"):
+        formatter_cls._format_strings = _format_strings_patched
+    else:
+        formatter_cls._format_strings = _format_strings_patched_v1_0_0
+    formatter_cls._patched_by_ray_datasets = True
+
+###########################################
+# End patching of ExtensionArrayFormatter #
+###########################################
+
+
+@PublicAPI(stability="beta")
+@pd.api.extensions.register_extension_dtype
+class TensorDtype(pd.api.extensions.ExtensionDtype):
+    """
+    Pandas extension type for a column of homogeneous-typed tensors.
+
+    This extension supports tensors in which the elements have different shapes.
+    However, each tensor element must be non-ragged, i.e. each tensor element must have
+    a well-defined, non-ragged shape.
+
+    See:
+    https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py
+    for up-to-date interface documentation and the subclassing contract. The
+    docstrings of the below properties and methods were copied from the base
+    ExtensionDtype.
+
+    Examples:
+        >>> # Create a DataFrame with a list of ndarrays as a column.
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> import ray
+        >>> df = pd.DataFrame({
+        ...     "one": [1, 2, 3],
+        ...     "two": list(np.arange(24).reshape((3, 2, 2, 2)))})
+        >>> # Note the opaque np.object dtype for this column.
+        >>> df.dtypes # doctest: +SKIP
+        one     int64
+        two    object
+        dtype: object
+        >>> # Cast column to our TensorDtype extension type.
+        >>> from ray.data.extensions import TensorDtype
+        >>> df["two"] = df["two"].astype(TensorDtype(np.int64, (3, 2, 2, 2)))
+        >>> # Note that the column dtype is now TensorDtype instead of
+        >>> # np.object.
+        >>> df.dtypes # doctest: +SKIP
+        one          int64
+        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
+        dtype: object
+        >>> # Pandas is now aware of this tensor column, and we can do the
+        >>> # typical DataFrame operations on this column.
+        >>> col = 2 * (df["two"] + 10)
+        >>> # The ndarrays underlying the tensor column will be manipulated,
+        >>> # but the column itself will continue to be a Pandas type.
+        >>> type(col) # doctest: +SKIP
+        pandas.core.series.Series
+        >>> col # doctest: +SKIP
+        0   [[[ 2  4]
+              [ 6  8]]
+             [[10 12]
+               [14 16]]]
+        1   [[[18 20]
+              [22 24]]
+             [[26 28]
+              [30 32]]]
+        2   [[[34 36]
+              [38 40]]
+             [[42 44]
+              [46 48]]]
+        Name: two, dtype: TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
+        >>> # Once you do an aggregation on that column that returns a single
+        >>> # row's value, you get back our TensorArrayElement type.
+        >>> tensor = col.mean()
+        >>> type(tensor) # doctest: +SKIP
+        ray.data.extensions.tensor_extension.TensorArrayElement
+        >>> tensor # doctest: +SKIP
+        array([[[18., 20.],
+                [22., 24.]],
+               [[26., 28.],
+                [30., 32.]]])
+        >>> # This is a light wrapper around a NumPy ndarray, and can easily
+        >>> # be converted to an ndarray.
+        >>> type(tensor.to_numpy()) # doctest: +SKIP
+        numpy.ndarray
+        >>> # In addition to doing Pandas operations on the tensor column,
+        >>> # you can now put the DataFrame into a Dataset.
+        >>> ds = ray.data.from_pandas(df) # doctest: +SKIP
+        >>> # Internally, this column is represented the corresponding
+        >>> # Arrow tensor extension type.
+        >>> ds.schema() # doctest: +SKIP
+        one: int64
+        two: extension<arrow.py_extension_type<ArrowTensorType>>
+        >>> # You can write the dataset to Parquet.
+        >>> ds.write_parquet("/some/path") # doctest: +SKIP
+        >>> # And you can read it back.
+        >>> read_ds = ray.data.read_parquet("/some/path") # doctest: +SKIP
+        >>> read_ds.schema() # doctest: +SKIP
+        one: int64
+        two: extension<arrow.py_extension_type<ArrowTensorType>>
+        >>> read_df = ray.get(read_ds.to_pandas_refs())[0] # doctest: +SKIP
+        >>> read_df.dtypes # doctest: +SKIP
+        one          int64
+        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
+        dtype: object
+        >>> # The tensor extension type is preserved along the
+        >>> # Pandas --> Arrow --> Parquet --> Arrow --> Pandas
+        >>> # conversion chain.
+        >>> read_df.equals(df) # doctest: +SKIP
+        True
+    """
+
+    # NOTE(Clark): This is apparently required to prevent integer indexing
+    # errors, but is an undocumented ExtensionDtype attribute. See issue:
+    # https://github.com/CODAIT/text-extensions-for-pandas/issues/166
+    base = None
+
+    def __init__(self, shape: Tuple[Optional[int], ...], dtype: np.dtype):
+        self._shape = shape
+        self._dtype = dtype
+
+    @property
+    def type(self):
+        """
+        The scalar type for the array, e.g. ``int``
+        It's expected ``ExtensionArray[item]`` returns an instance
+        of ``ExtensionDtype.type`` for scalar ``item``, assuming
+        that value is valid (not NA). NA values do not need to be
+        instances of `type`.
+        """
+        return TensorArrayElement
+
+    @property
+    def element_dtype(self):
+        """
+        The dtype of the underlying tensor elements.
+        """
+        return self._dtype
+
+    @property
+    def element_shape(self):
+        """
+        The shape of the underlying tensor elements. This will be a tuple of Nones if
+        the corresponding TensorArray for this TensorDtype holds variable-shaped tensor
+        elements.
+        """
+        return self._shape
+
+    @property
+    def is_variable_shaped(self):
+        """
+        Whether the corresponding TensorArray for this TensorDtype holds variable-shaped
+        tensor elements.
+        """
+        return all(dim_size is None for dim_size in self.shape)
+
+    @property
+    def name(self) -> str:
+        """
+        A string identifying the data type.
+        Will be used for display in, e.g. ``Series.dtype``
+        """
+        return f"numpy.ndarray(shape={self._shape}, dtype={self._dtype})"
+
+    @classmethod
+    def construct_from_string(cls, string: str):
+        r"""
+        Construct this type from a string.
+
+        This is useful mainly for data types that accept parameters.
+        For example, a period dtype accepts a frequency parameter that
+        can be set as ``period[H]`` (where H means hourly frequency).
+
+        By default, in the abstract class, just the name of the type is
+        expected. But subclasses can overwrite this method to accept
+        parameters.
+
+        Parameters
+        ----------
+        string : str
+            The name of the type, for example ``category``.
+
+        Returns
+        -------
+        ExtensionDtype
+            Instance of the dtype.
+
+        Raises
+        ------
+        TypeError
+            If a class cannot be constructed from this 'string'.
+
+        Examples
+        --------
+        For extension dtypes with arguments the following may be an
+        adequate implementation.
+
+        >>> import re
+        >>> @classmethod
+        ... def construct_from_string(cls, string):
+        ...     pattern = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
+        ...     match = pattern.match(string)
+        ...     if match:
+        ...         return cls(**match.groupdict())
+        ...     else:
+        ...         raise TypeError(
+        ...             f"Cannot construct a '{cls.__name__}' from '{string}'"
+        ...         )
+        """
+        import ast
+        import re
+
+        if not isinstance(string, str):
+            raise TypeError(
+                f"'construct_from_string' expects a string, got {type(string)}"
+            )
+        # Upstream code uses exceptions as part of its normal control flow and
+        # will pass this method bogus class names.
+        regex = (
+            r"^(TensorDtype|numpy.ndarray)"
+            r"\(shape=(\((?:(?:\d+|None),?\s?)*\)), dtype=(\w+)\)$"
+        )
+        m = re.search(regex, string)
+        err_msg = (
+            f"Cannot construct a '{cls.__name__}' from '{string}'; expected a string "
+            "like 'TensorDtype(shape=(1, 2, 3), dtype=int64)'."
+        )
+        if m is None:
+            raise TypeError(err_msg)
+        groups = m.groups()
+        if len(groups) != 3:
+            raise TypeError(err_msg)
+        _, shape, dtype = groups
+        shape = ast.literal_eval(shape)
+        dtype = np.dtype(dtype)
+        return cls(shape, dtype)
+
+    @classmethod
+    def construct_array_type(cls):
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return TensorArray
+
+    def __from_arrow__(self, array: Union[pa.Array, pa.ChunkedArray]):
+        """
+        Convert a pyarrow (chunked) array to a TensorArray.
+
+        This and TensorArray.__arrow_array__ make up the
+        Pandas extension type + array <--> Arrow extension type + array
+        interoperability protocol. See
+        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
+        for more information.
+        """
+        if isinstance(array, pa.ChunkedArray):
+            if array.num_chunks > 1:
+                # TODO(Clark): Remove concat and construct from list with
+                # shape.
+                values = np.concatenate(
+                    [chunk.to_numpy() for chunk in array.iterchunks()]
+                )
+            else:
+                values = array.chunk(0).to_numpy()
+        else:
+            values = array.to_numpy()
+
+        return TensorArray(values)
+
+    def __str__(self) -> str:
+        return self.name
+
+    def __repr__(self) -> str:
+        return str(self)
+
+    @property
+    def _is_boolean(self):
+        """
+        Whether this extension array should be considered boolean.
+
+        By default, ExtensionArrays are assumed to be non-numeric.
+        Setting this to True will affect the behavior of several places,
+        e.g.
+
+        * is_bool
+        * boolean indexing
+
+        Returns
+        -------
+        bool
+        """
+        # This is needed to support returning a TensorArray from .isnan().
+        from pandas.core.dtypes.common import is_bool_dtype
+
+        return is_bool_dtype(self._dtype)
+
+
+class _TensorOpsMixin(pd.api.extensions.ExtensionScalarOpsMixin):
+    """
+    Mixin for TensorArray operator support, applying operations on the
+    underlying ndarrays.
+    """
+
+    @classmethod
+    def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None):
+        """
+        Add support for binary operators by unwrapping, applying, and
+        rewrapping.
+        """
+
+        # NOTE(Clark): This overrides, but coerce_to_dtype, result_dtype might
+        # not be needed
+
+        def _binop(self, other):
+            lvalues = self._tensor
+
+            if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndex)):
+                # Rely on Pandas to unbox and dispatch to us.
+                return NotImplemented
+
+            # divmod returns a tuple
+            if op_name in ["__divmod__", "__rdivmod__"]:
+                # TODO(Clark): Add support for divmod and rdivmod.
+                # div, mod = result
+                raise NotImplementedError
+
+            if isinstance(other, (TensorArray, TensorArrayElement)):
+                rvalues = other._tensor
+            else:
+                rvalues = other
+
+            result = op(lvalues, rvalues)
+
+            # Force a TensorArray if rvalue is not a scalar.
+            if isinstance(self, TensorArrayElement) and (
+                not isinstance(other, TensorArrayElement) or not np.isscalar(other)
+            ):
+                result_wrapped = TensorArray(result)
+            else:
+                result_wrapped = cls(result)
+
+            return result_wrapped
+
+        op_name = f"__{op.__name__}__"
+        return set_function_name(_binop, op_name, cls)
+
+    @classmethod
+    def _create_logical_method(cls, op):
+        return cls._create_method(op)
+
+
+class _TensorScalarCastMixin:
+    """
+    Mixin for casting scalar tensors to a particular numeric type.
+    """
+
+    def _scalarfunc(self, func: Callable[[Any], Any]):
+        return func(self._tensor)
+
+    def __complex__(self):
+        return self._scalarfunc(complex)
+
+    def __float__(self):
+        return self._scalarfunc(float)
+
+    def __int__(self):
+        return self._scalarfunc(int)
+
+    def __hex__(self):
+        return self._scalarfunc(hex)
+
+    def __oct__(self):
+        return self._scalarfunc(oct)
+
+
+@PublicAPI(stability="beta")
+class TensorArrayElement(_TensorOpsMixin, _TensorScalarCastMixin):
+    """
+    Single element of a TensorArray, wrapping an underlying ndarray.
+    """
+
+    def __init__(self, values: np.ndarray):
+        """
+        Construct a TensorArrayElement from a NumPy ndarray.
+
+        Args:
+            values: ndarray that underlies this TensorArray element.
+        """
+        self._tensor = values
+
+    def __repr__(self):
+        return self._tensor.__repr__()
+
+    def __str__(self):
+        return self._tensor.__str__()
+
+    @property
+    def numpy_dtype(self):
+        """
+        Get the dtype of the tensor.
+        :return: The numpy dtype of the backing ndarray
+        """
+        return self._tensor.dtype
+
+    @property
+    def numpy_ndim(self):
+        """
+        Get the number of tensor dimensions.
+        :return: integer for the number of dimensions
+        """
+        return self._tensor.ndim
+
+    @property
+    def numpy_shape(self):
+        """
+        Get the shape of the tensor.
+        :return: A tuple of integers for the numpy shape of the backing ndarray
+        """
+        return self._tensor.shape
+
+    @property
+    def numpy_size(self):
+        """
+        Get the size of the tensor.
+        :return: integer for the number of elements in the tensor
+        """
+        return self._tensor.size
+
+    def to_numpy(self):
+        """
+        Return the values of this element as a NumPy ndarray.
+        """
+        return np.asarray(self._tensor)
+
+    def __array__(self, dtype: np.dtype = None, **kwargs) -> np.ndarray:
+        return np.asarray(self._tensor, dtype=dtype, **kwargs)
+
+
+@PublicAPI(stability="beta")
+class TensorArray(
+    pd.api.extensions.ExtensionArray,
+    _TensorOpsMixin,
+    _TensorScalarCastMixin,
+):
+    """
+    Pandas `ExtensionArray` representing a tensor column, i.e. a column
+    consisting of ndarrays as elements.
+
+    This extension supports tensors in which the elements have different shapes.
+    However, each tensor element must be non-ragged, i.e. each tensor element must have
+    a well-defined, non-ragged shape.
+
+    Examples:
+        >>> # Create a DataFrame with a list of ndarrays as a column.
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> import ray
+        >>> from ray.data.extensions import TensorArray
+        >>> df = pd.DataFrame({
+        ...     "one": [1, 2, 3],
+        ...     "two": TensorArray(np.arange(24).reshape((3, 2, 2, 2)))})
+        >>> # Note that the column dtype is TensorDtype.
+        >>> df.dtypes # doctest: +SKIP
+        one          int64
+        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
+        dtype: object
+        >>> # Pandas is aware of this tensor column, and we can do the
+        >>> # typical DataFrame operations on this column.
+        >>> col = 2 * (df["two"] + 10)
+        >>> # The ndarrays underlying the tensor column will be manipulated,
+        >>> # but the column itself will continue to be a Pandas type.
+        >>> type(col) # doctest: +SKIP
+        pandas.core.series.Series
+        >>> col # doctest: +SKIP
+        0   [[[ 2  4]
+              [ 6  8]]
+             [[10 12]
+               [14 16]]]
+        1   [[[18 20]
+              [22 24]]
+             [[26 28]
+              [30 32]]]
+        2   [[[34 36]
+              [38 40]]
+             [[42 44]
+              [46 48]]]
+        Name: two, dtype: TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
+        >>> # Once you do an aggregation on that column that returns a single
+        >>> # row's value, you get back our TensorArrayElement type.
+        >>> tensor = col.mean() # doctest: +SKIP
+        >>> type(tensor) # doctest: +SKIP
+        ray.data.extensions.tensor_extension.TensorArrayElement
+        >>> tensor # doctest: +SKIP
+        array([[[18., 20.],
+                [22., 24.]],
+               [[26., 28.],
+                [30., 32.]]])
+        >>> # This is a light wrapper around a NumPy ndarray, and can easily
+        >>> # be converted to an ndarray.
+        >>> type(tensor.to_numpy()) # doctest: +SKIP
+        numpy.ndarray
+        >>> # In addition to doing Pandas operations on the tensor column,
+        >>> # you can now put the DataFrame into a Dataset.
+        >>> ds = ray.data.from_pandas(df) # doctest: +SKIP
+        >>> # Internally, this column is represented the corresponding
+        >>> # Arrow tensor extension type.
+        >>> ds.schema() # doctest: +SKIP
+        one: int64
+        two: extension<arrow.py_extension_type<ArrowTensorType>>
+        >>> # You can write the dataset to Parquet.
+        >>> ds.write_parquet("/some/path") # doctest: +SKIP
+        >>> # And you can read it back.
+        >>> read_ds = ray.data.read_parquet("/some/path") # doctest: +SKIP
+        >>> read_ds.schema() # doctest: +SKIP
+        one: int64
+        two: extension<arrow.py_extension_type<ArrowTensorType>>
+
+        >>> read_df = ray.get(read_ds.to_pandas_refs())[0] # doctest: +SKIP
+        >>> read_df.dtypes # doctest: +SKIP
+        one          int64
+        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
+        dtype: object
+        >>> # The tensor extension type is preserved along the
+        >>> # Pandas --> Arrow --> Parquet --> Arrow --> Pandas
+        >>> # conversion chain.
+        >>> read_df.equals(df) # doctest: +SKIP
+        True
+    """
+
+    SUPPORTED_REDUCERS = {
+        "sum": np.sum,
+        "all": np.all,
+        "any": np.any,
+        "min": np.min,
+        "max": np.max,
+        "mean": np.mean,
+        "median": np.median,
+        "prod": np.prod,
+        "std": np.std,
+        "var": np.var,
+    }
+
+    # See https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py
+    # for interface documentation and the subclassing contract.
+    def __init__(
+        self,
+        values: Union[
+            np.ndarray,
+            ABCSeries,
+            Sequence[Union[np.ndarray, TensorArrayElement]],
+            TensorArrayElement,
+            Any,
+        ],
+    ):
+        """
+        Args:
+            values: A NumPy ndarray or sequence of NumPy ndarrays of equal
+                shape.
+        """
+        # Try to convert some well-known objects to ndarrays before handing off to
+        # ndarray handling logic.
+        if isinstance(values, ABCSeries):
+            values = _create_possibly_ragged_ndarray(values)
+        elif isinstance(values, Sequence):
+            values = [
+                np.asarray(v) if isinstance(v, TensorArrayElement) else v
+                for v in values
+            ]
+            values = _create_possibly_ragged_ndarray(values)
+        elif isinstance(values, TensorArrayElement):
+            values = np.array([np.asarray(values)], copy=False)
+
+        if isinstance(values, np.ndarray):
+            if values.dtype.type is np.object_:
+                if len(values) == 0:
+                    # Tensor is empty, pass through to create empty TensorArray.
+                    pass
+                elif all(
+                    isinstance(v, (np.ndarray, TensorArrayElement, Sequence))
+                    and not isinstance(v, str)
+                    for v in values
+                ):
+                    values = [np.asarray(v) for v in values]
+                    # Try to convert ndarrays of ndarrays/TensorArrayElements with an
+                    # opaque object type to a properly typed ndarray of ndarrays.
+                    values = _create_possibly_ragged_ndarray(values)
+                else:
+                    raise TypeError(
+                        "Expected a well-typed ndarray or an object-typed ndarray of "
+                        "ndarray pointers, but got an object-typed ndarray whose "
+                        f"subndarrays are of type {type(values[0])}."
+                    )
+        elif isinstance(values, TensorArray):
+            raise TypeError("Use the copy() method to create a copy of a TensorArray.")
+        else:
+            raise TypeError(
+                "Expected a numpy.ndarray or sequence of numpy.ndarray, "
+                f"but received {values} of type {type(values).__name__} instead."
+            )
+        assert isinstance(values, np.ndarray)
+        self._tensor = values
+        self._is_variable_shaped = None
+
+    @classmethod
+    def _from_sequence(
+        cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False
+    ):
+        """
+        Construct a new ExtensionArray from a sequence of scalars.
+
+        Parameters
+        ----------
+        scalars : Sequence
+            Each element will be an instance of the scalar type for this
+            array, ``cls.dtype.type`` or be converted into this type in this
+            method.
+        dtype : dtype, optional
+            Construct for this particular dtype. This should be a Dtype
+            compatible with the ExtensionArray.
+        copy : bool, default False
+            If True, copy the underlying data.
+
+        Returns
+        -------
+        ExtensionArray
+        """
+        if copy and isinstance(scalars, np.ndarray):
+            scalars = scalars.copy()
+        elif isinstance(scalars, TensorArray):
+            scalars = scalars._tensor.copy() if copy else scalars._tensor
+        return TensorArray(scalars)
+
+    @classmethod
+    def _from_factorized(
+        cls, values: np.ndarray, original: pd.api.extensions.ExtensionArray
+    ):
+        """
+        Reconstruct an ExtensionArray after factorization.
+
+        Parameters
+        ----------
+        values : ndarray
+            An integer ndarray with the factorized values.
+        original : ExtensionArray
+            The original ExtensionArray that factorize was called on.
+
+        See Also
+        --------
+        factorize : Top-level factorize method that dispatches here.
+        ExtensionArray.factorize : Encode the extension array as an enumerated
+            type.
+        """
+        raise NotImplementedError
+
+    def __getitem__(
+        self, item: Union[int, slice, np.ndarray]
+    ) -> Union["TensorArray", "TensorArrayElement"]:
+        """
+        Select a subset of self.
+
+        Parameters
+        ----------
+        item : int, slice, or ndarray
+            * int: The position in 'self' to get.
+            * slice: A slice object, where 'start', 'stop', and 'step' are
+              integers or None
+            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
+
+        Returns
+        -------
+        item : scalar or ExtensionArray
+
+        Notes
+        -----
+        For scalar ``item``, return a scalar value suitable for the array's
+        type. This should be an instance of ``self.dtype.type``.
+        For slice ``key``, return an instance of ``ExtensionArray``, even
+        if the slice is length 0 or 1.
+        For a boolean mask, return an instance of ``ExtensionArray``, filtered
+        to the values where ``item`` is True.
+        """
+        # Return scalar if single value is selected, a TensorArrayElement for
+        # single array element, or TensorArray for slice.
+        if isinstance(item, int):
+            value = self._tensor[item]
+            if np.isscalar(value):
+                return value
+            else:
+                return TensorArrayElement(value)
+        else:
+            # BEGIN workaround for Pandas issue #42430
+            if isinstance(item, tuple) and len(item) > 1 and item[0] == Ellipsis:
+                if len(item) > 2:
+                    # Hopefully this case is not possible, but can't be sure
+                    raise ValueError(
+                        "Workaround Pandas issue #42430 not "
+                        "implemented for tuple length > 2"
+                    )
+                item = item[1]
+            # END workaround for issue #42430
+            if isinstance(item, TensorArray):
+                item = np.asarray(item)
+            item = check_array_indexer(self, item)
+            return TensorArray(self._tensor[item])
+
+    def __len__(self) -> int:
+        """
+        Length of this array.
+
+        Returns
+        -------
+        length : int
+        """
+        return len(self._tensor)
+
+    @property
+    def dtype(self) -> pd.api.extensions.ExtensionDtype:
+        """
+        An instance of 'ExtensionDtype'.
+        """
+        if self.is_variable_shaped:
+            # A tensor is only considered variable-shaped if it's non-empty, so no
+            # non-empty check is needed here.
+            dtype = self._tensor[0].dtype
+            shape = (None,) * self._tensor[0].ndim
+        else:
+            dtype = self.numpy_dtype
+            shape = self.numpy_shape[1:]
+        return TensorDtype(shape, dtype)
+
+    @property
+    def is_variable_shaped(self):
+        """
+        Whether this TensorArray holds variable-shaped tensor elements.
+        """
+        if self._is_variable_shaped is None:
+            self._is_variable_shaped = _is_ndarray_variable_shaped_tensor(self._tensor)
+        return self._is_variable_shaped
+
+    @property
+    def nbytes(self) -> int:
+        """
+        The number of bytes needed to store this object in memory.
+        """
+        return self._tensor.nbytes
+
+    def isna(self) -> "TensorArray":
+        """
+        A 1-D array indicating if each value is missing.
+
+        Returns
+        -------
+        na_values : Union[np.ndarray, ExtensionArray]
+            In most cases, this should return a NumPy ndarray. For
+            exceptional cases like ``SparseArray``, where returning
+            an ndarray would be expensive, an ExtensionArray may be
+            returned.
+
+        Notes
+        -----
+        If returning an ExtensionArray, then
+
+        * ``na_values._is_boolean`` should be True
+        * `na_values` should implement :func:`ExtensionArray._reduce`
+        * ``na_values.any`` and ``na_values.all`` should be implemented
+        """
+        if self._tensor.dtype.type is np.object_:
+            # Avoid comparing with __eq__ because the elements of the tensor
+            # may do something funny with that operation.
+            return np.array(
+                [self._tensor[i] is None for i in range(len(self))], dtype=bool
+            )
+        elif self._tensor.dtype.type is np.str_:
+            return np.all(self._tensor == "", axis=tuple(range(1, self._tensor.ndim)))
+        else:
+            return np.all(
+                np.isnan(self._tensor), axis=tuple(range(1, self._tensor.ndim))
+            )
+
+    def take(
+        self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None
+    ) -> "TensorArray":
+        """
+        Take elements from an array.
+
+        Parameters
+        ----------
+        indices : sequence of int
+            Indices to be taken.
+        allow_fill : bool, default False
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+        fill_value : any, optional
+            Fill value to use for NA-indices when `allow_fill` is True.
+            This may be ``None``, in which case the default NA value for
+            the type, ``self.dtype.na_value``, is used.
+
+            For many ExtensionArrays, there will be two representations of
+            `fill_value`: a user-facing "boxed" scalar, and a low-level
+            physical NA value. `fill_value` should be the user-facing version,
+            and the implementation should handle translating that to the
+            physical version for processing the take if necessary.
+
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indices are out of bounds for the array.
+        ValueError
+            When `indices` contains negative values other than ``-1``
+            and `allow_fill` is True.
+
+        See Also
+        --------
+        numpy.take : Take elements from an array along an axis.
+        api.extensions.take : Take elements from an array.
+
+        Notes
+        -----
+        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
+        ``iloc``, when `indices` is a sequence of values. Additionally,
+        it's called by :meth:`Series.reindex`, or any other method
+        that causes realignment, with a `fill_value`.
+
+        Examples
+        --------
+        Here's an example implementation, which relies on casting the
+        extension array to object dtype. This uses the helper method
+        :func:`pandas.api.extensions.take`.
+
+        .. code-block:: python
+
+           def take(self, indices, allow_fill=False, fill_value=None):
+               from pandas.core.algorithms import take
+
+               # If the ExtensionArray is backed by an ndarray, then
+               # just pass that here instead of coercing to object.
+               data = self.astype(object)
+
+               if allow_fill and fill_value is None:
+                   fill_value = self.dtype.na_value
+
+               # fill value should always be translated from the scalar
+               # type for the array, to the physical storage type for
+               # the data, before passing to take.
+
+               result = take(data, indices, fill_value=fill_value,
+                             allow_fill=allow_fill)
+               return self._from_sequence(result, dtype=self.dtype)
+        """
+        if allow_fill:
+            # With allow_fill being True, negative values in `indices` indicate
+            # missing values and should be set to `fill_value`.
+            indices = np.asarray(indices, dtype=np.intp)
+            validate_indices(indices, len(self._tensor))
+
+            # Check if there are missing indices to fill, otherwise we can
+            # delegate to NumPy ndarray .take().
+            has_missing = np.any(indices < 0)
+            if has_missing:
+                if fill_value is None:
+                    fill_value = np.nan
+
+                # Create an array populated with fill value.
+                values = np.full((len(indices),) + self._tensor.shape[1:], fill_value)
+
+                # Put tensors at the given positive indices into array.
+                is_nonneg = indices >= 0
+                np.put(values, np.where(is_nonneg)[0], self._tensor[indices[is_nonneg]])
+
+                return TensorArray(values)
+
+        # Delegate take to NumPy array.
+        values = self._tensor.take(indices, axis=0)
+
+        return TensorArray(values)
+
+    def copy(self) -> "TensorArray":
+        """
+        Return a copy of the array.
+
+        Returns
+        -------
+        ExtensionArray
+        """
+        # TODO(Clark): Copy cached properties.
+        return TensorArray(self._tensor.copy())
+
+    @classmethod
+    def _concat_same_type(cls, to_concat: Sequence["TensorArray"]) -> "TensorArray":
+        """
+        Concatenate multiple array of this dtype.
+
+        Parameters
+        ----------
+        to_concat : sequence of this type
+
+        Returns
+        -------
+        ExtensionArray
+        """
+        should_flatten = False
+        shape = None
+        for a in to_concat:
+            if shape is None:
+                shape = a.dtype.element_shape
+            if a.is_variable_shaped or a.dtype.element_shape != shape:
+                should_flatten = True
+                break
+        if should_flatten:
+            concated = TensorArray(
+                np.array([e for a in to_concat for e in a._tensor], dtype=object)
+            )
+        else:
+            concated = TensorArray(np.concatenate([a._tensor for a in to_concat]))
+        return concated
+
+    def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
+        """
+        Set one or more values inplace.
+
+        This method is not required to satisfy the pandas extension array
+        interface.
+
+        Parameters
+        ----------
+        key : int, ndarray, or slice
+            When called from, e.g. ``Series.__setitem__``, ``key`` will be
+            one of
+
+            * scalar int
+            * ndarray of integers.
+            * boolean ndarray
+            * slice object
+
+        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
+            value or values to be set of ``key``.
+
+        Returns
+        -------
+        None
+        """
+        key = check_array_indexer(self, key)
+        if isinstance(value, TensorArrayElement) or np.isscalar(value):
+            value = np.asarray(value)
+        if isinstance(value, list):
+            value = [
+                np.asarray(v) if isinstance(v, TensorArrayElement) else v for v in value
+            ]
+        if isinstance(value, ABCSeries) and isinstance(value.dtype, TensorDtype):
+            value = value.values
+        if value is None or isinstance(value, Sequence) and len(value) == 0:
+            self._tensor[key] = np.full_like(self._tensor[key], np.nan)
+        elif isinstance(key, (int, slice, np.ndarray)):
+            self._tensor[key] = value
+        else:
+            raise NotImplementedError(
+                f"__setitem__ with key type '{type(key)}' not implemented"
+            )
+
+    def __contains__(self, item) -> bool:
+        """
+        Return for `item in self`.
+        """
+        if isinstance(item, TensorArrayElement):
+            np_item = np.asarray(item)
+            if np_item.size == 1 and np.isnan(np_item).all():
+                return self.isna().any()
+        return super().__contains__(item)
+
+    def __repr__(self):
+        return self._tensor.__repr__()
+
+    def __str__(self):
+        return self._tensor.__str__()
+
+    def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
+        # TODO(Clark): return self._tensor, np.nan
+        raise NotImplementedError
+
+    def _reduce(self, name: str, skipna: bool = True, **kwargs):
+        """
+        Return a scalar result of performing the reduction operation.
+
+        Parameters
+        ----------
+        name : str
+            Name of the function, supported values are:
+            { any, all, min, max, sum, mean, median, prod,
+            std, var, sem, kurt, skew }.
+        skipna : bool, default True
+            If True, skip NaN values.
+        **kwargs
+            Additional keyword arguments passed to the reduction function.
+            Currently, `ddof` is the only supported kwarg.
+
+        Returns
+        -------
+        scalar
+
+        Raises
+        ------
+        TypeError : subclass does not define reductions
+        """
+        supported_kwargs = ["ddof"]
+        reducer_kwargs = {}
+        for kw in supported_kwargs:
+            try:
+                reducer_kwargs[kw] = kwargs[kw]
+            except KeyError:
+                pass
+        try:
+            return TensorArrayElement(
+                self.SUPPORTED_REDUCERS[name](self._tensor, axis=0, **reducer_kwargs)
+            )
+        except KeyError:
+            raise NotImplementedError(f"'{name}' aggregate not implemented.") from None
+
+    def __array__(self, dtype: np.dtype = None, **kwargs) -> np.ndarray:
+        return np.asarray(self._tensor, dtype=dtype, **kwargs)
+
+    def __array_ufunc__(self, ufunc: Callable, method: str, *inputs, **kwargs):
+        """
+        Supports NumPy ufuncs without requiring sloppy coercion to an
+        ndarray.
+        """
+        out = kwargs.get("out", ())
+        for x in inputs + out:
+            if not isinstance(x, (TensorArray, np.ndarray, numbers.Number)):
+                return NotImplemented
+
+        # Defer to the implementation of the ufunc on unwrapped values.
+        inputs = tuple(x._tensor if isinstance(x, TensorArray) else x for x in inputs)
+        if out:
+            kwargs["out"] = tuple(
+                x._tensor if isinstance(x, TensorArray) else x for x in out
+            )
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+
+        if type(result) is tuple:
+            # Multiple return values.
+            return tuple(type(self)(x) for x in result)
+        elif method == "at":
+            # No return value.
+            return None
+        else:
+            # One return value.
+            return type(self)(result)
+
+    def to_numpy(
+        self,
+        dtype: np.dtype = None,
+        copy: bool = False,
+        na_value: Any = pd.api.extensions.no_default,
+    ):
+        """
+        Convert to a NumPy ndarray.
+
+        .. versionadded:: 1.0.0
+
+        This is similar to :meth:`numpy.asarray`, but may provide additional
+        control over how the conversion is done.
+
+        Parameters
+        ----------
+        dtype : str or numpy.dtype, optional
+            The dtype to pass to :meth:`numpy.asarray`.
+        copy : bool, default False
+            Whether to ensure that the returned value is a not a view on
+            another array. Note that ``copy=False`` does not *ensure* that
+            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
+            a copy is made, even if not strictly necessary.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+        if dtype is not None:
+            dtype = pd.api.types.pandas_dtype(dtype)
+            if copy:
+                values = np.array(self._tensor, dtype=dtype, copy=True)
+            else:
+                values = self._tensor.astype(dtype)
+        elif copy:
+            values = self._tensor.copy()
+        else:
+            values = self._tensor
+        return values
+
+    @property
+    def numpy_dtype(self):
+        """
+        Get the dtype of the tensor.
+        :return: The numpy dtype of the backing ndarray
+        """
+        return self._tensor.dtype
+
+    @property
+    def numpy_ndim(self):
+        """
+        Get the number of tensor dimensions.
+        :return: integer for the number of dimensions
+        """
+        return self._tensor.ndim
+
+    @property
+    def numpy_shape(self):
+        """
+        Get the shape of the tensor.
+        :return: A tuple of integers for the numpy shape of the backing ndarray
+        """
+        return self._tensor.shape
+
+    @property
+    def numpy_size(self):
+        """
+        Get the size of the tensor.
+        :return: integer for the number of elements in the tensor
+        """
+        return self._tensor.size
+
+    def astype(self, dtype, copy=True):
+        """
+        Cast to a NumPy array with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        array : ndarray
+            NumPy ndarray with 'dtype' for its dtype.
+        """
+        dtype = pd.api.types.pandas_dtype(dtype)
+
+        if isinstance(dtype, TensorDtype):
+            values = TensorArray(self._tensor.copy()) if copy else self
+        elif not (
+            pd.api.types.is_object_dtype(dtype) and pd.api.types.is_string_dtype(dtype)
+        ):
+            values = np.array([str(t) for t in self._tensor])
+            if isinstance(dtype, pd.StringDtype):
+                return dtype.construct_array_type()._from_sequence(values, copy=False)
+            else:
+                return values
+        elif pd.api.types.is_object_dtype(dtype):
+            # Interpret astype(object) as "cast to an array of numpy arrays"
+            values = np.empty(len(self), dtype=object)
+            for i in range(len(self)):
+                values[i] = self._tensor[i]
+        else:
+            values = self._tensor.astype(dtype, copy=copy)
+        return values
+
+    def any(self, axis=None, out=None, keepdims=False):
+        """
+        Test whether any array element along a given axis evaluates to True.
+
+        See numpy.any() documentation for more information
+        https://numpy.org/doc/stable/reference/generated/numpy.any.html#numpy.any
+
+        :param axis: Axis or axes along which a logical OR reduction is
+            performed.
+        :param out: Alternate output array in which to place the result.
+        :param keepdims: If this is set to True, the axes which are reduced are
+            left in the result as dimensions with size one.
+        :return: single boolean unless axis is not None else TensorArray
+        """
+        result = self._tensor.any(axis=axis, out=out, keepdims=keepdims)
+        return result if axis is None else TensorArray(result)
+
+    def all(self, axis=None, out=None, keepdims=False):
+        """
+        Test whether all array elements along a given axis evaluate to True.
+
+        :param axis: Axis or axes along which a logical AND reduction is
+            performed.
+        :param out: Alternate output array in which to place the result.
+        :param keepdims: If this is set to True, the axes which are reduced are
+            left in the result as dimensions with size one.
+        :return: single boolean unless axis is not None else TensorArray
+        """
+        result = self._tensor.all(axis=axis, out=out, keepdims=keepdims)
+        return result if axis is None else TensorArray(result)
+
+    def __arrow_array__(self, type=None):
+        """
+        Convert this TensorArray to an ArrowTensorArray extension array.
+
+        This and TensorDtype.__from_arrow__ make up the
+        Pandas extension type + array <--> Arrow extension type + array
+        interoperability protocol. See
+        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
+        for more information.
+        """
+        from ray.air.util.tensor_extensions.arrow import (
+            ArrowTensorArray,
+            ArrowVariableShapedTensorArray,
+        )
+
+        if self.is_variable_shaped:
+            return ArrowVariableShapedTensorArray.from_numpy(self._tensor)
+        else:
+            return ArrowTensorArray.from_numpy(self._tensor)
+
+    @property
+    def _is_boolean(self):
+        """
+        Whether this extension array should be considered boolean.
+
+        By default, ExtensionArrays are assumed to be non-numeric.
+        Setting this to True will affect the behavior of several places,
+        e.g.
+
+        * is_bool
+        * boolean indexing
+
+        Returns
+        -------
+        bool
+        """
+        # This is needed to support returning a TensorArray from .isnan().
+        return self.dtype._is_boolean()
+
+
+# Add operators from the mixin to the TensorArrayElement and TensorArray
+# classes.
+TensorArrayElement._add_arithmetic_ops()
+TensorArrayElement._add_comparison_ops()
+TensorArrayElement._add_logical_ops()
+TensorArray._add_arithmetic_ops()
+TensorArray._add_comparison_ops()
+TensorArray._add_logical_ops()
+
+
+@PublicAPI(stability="beta")
+def column_needs_tensor_extension(s: pd.Series) -> bool:
+    """Return whether the provided pandas Series column needs a tensor extension
+    representation. This tensor extension representation provides more efficient slicing
+    and interop with ML frameworks.
+
+    Args:
+        s: The pandas Series column that may need to be represented using the tensor
+            extension.
+
+    Returns:
+        Whether the provided Series needs a tensor extension representation.
+    """
+    # NOTE: This is an O(1) check.
+    return (
+        s.dtype.type is np.object_ and not s.empty and isinstance(s.iloc[0], np.ndarray)
+    )
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/torch_dist.py b/.venv/lib/python3.11/site-packages/ray/air/util/torch_dist.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a73164977102dd74c7731ce4a1b0d5584679b59
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/torch_dist.py
@@ -0,0 +1,191 @@
+"""This file is modeled after ray/python/ray/train/torch/config.py
+
+The logics are duplicated right now to allow maximum flexibility for
+setting up PyTorch DDP process groups outside the context of Ray Train.
+Eventually, these use cases should be consolidated.
+"""
+
+import os
+from abc import ABC
+from collections import defaultdict
+from datetime import timedelta
+from typing import Callable, List, T
+
+import torch
+import torch.distributed as dist
+
+import ray
+from ray.actor import ActorHandle
+from ray.air._internal.torch_utils import get_devices
+from ray.train._internal.utils import get_address_and_port
+
+
+class TorchDistributedWorker(ABC):
+    """Defines the interfaces required by the init_torch_dist_process_group().
+
+    This is modeled after RayTrainerWorker, which allows arbitrary functions
+    to be executed on a remote DDP worker.
+    """
+
+    def execute(self, func: Callable[..., T], *args, **kwargs) -> T:
+        """Executes the input function and returns the output.
+
+        Args:
+            func: The function to execute.
+            args, kwargs: The arguments to pass into func.
+        """
+        return func(*args, **kwargs)
+
+
+def _init_torch_distributed(
+    init_method: str,
+    backend: str,
+    rank: int,
+    world_size: int,
+    local_rank: int,
+    local_world_size: int,
+    master_addr: str,
+    master_port: str,
+    gpu_ids: List[int],
+    **init_process_group_kwargs,
+):
+    """Initialize torch distributed backend"""
+    if init_method == "env":
+        os.environ["MASTER_ADDR"] = str(master_addr)
+        os.environ["MASTER_PORT"] = str(master_port)
+        url = "env://"
+    elif init_method == "tcp":
+        url = f"tcp://{master_addr}:{master_port}"
+    else:
+        raise ValueError(
+            f"The provided init_method ("
+            f"{init_method}) is not supported. Must "
+            f"be either 'env' or 'tcp'."
+        )
+
+    if backend == "nccl":
+        # Same as in Ray Train
+        os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"
+        # All workers on a same node should share the same set of
+        # visible GPUs. Otherwise they can't talk among themselves.
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(gid) for gid in gpu_ids)
+
+    init_process_group_kwargs.update(
+        dict(
+            backend=backend,
+            init_method=url,
+            rank=rank,
+            world_size=world_size,
+        )
+    )
+    init_process_group_kwargs.setdefault("timeout", timedelta(seconds=1800))
+
+    dist.init_process_group(**init_process_group_kwargs)
+
+    os.environ["RANK"] = str(rank)
+    os.environ["LOCAL_RANK"] = str(local_rank)
+    os.environ["WORLD_SIZE"] = str(world_size)
+    os.environ["LOCAL_WORLD_SIZE"] = str(local_world_size)
+
+
+def _get_node_and_gpu_ids():
+    """Returns the node_id and gpu_ids for this worker."""
+    node_id = ray.get_runtime_context().get_node_id()
+    gpu_ids = ray.get_gpu_ids()
+    return node_id, gpu_ids
+
+
+def init_torch_dist_process_group(
+    workers: List[ActorHandle],
+    backend: str = "gloo",
+    init_method: str = "env",
+    **init_process_group_kwargs,
+) -> List[int]:
+    """Initialize a torch distributed process group.
+
+    Note: this util assumes that the order of the workers passed in
+    are their global ranks.
+
+    Args:
+        workers: A list of TorchDistributedWorker actors.
+        backend: The torch distributed backend to use,
+            possible choices are "gloo" or "nccl".
+        init_method: The initialization method to use,
+            possible choices are "env" or "tcp".
+        init_process_group_kwargs: Additional kwargs to pass to the call to
+            :meth:`torch.distributed.init_process_group`.
+
+    Returns:
+        Local ranks on their respective nodes for the list of workers.
+    """
+    if not dist.is_available():
+        raise RuntimeError("Distributed torch is not available.")
+
+    # Build a map from node_id to workers on that node.
+    node_and_gpu_ids = ray.get(
+        [w.execute.remote(_get_node_and_gpu_ids) for w in workers]
+    )
+    # All the workers on a specific node.
+    node_to_workers = defaultdict(list)
+    # All the gpu ids visible to all the workers on a specific node.
+    node_to_gpu_ids = defaultdict(set)
+    for i, (node_id, gpu_ids) in enumerate(node_and_gpu_ids):
+        node_to_workers[node_id].append(i)
+        # Force list.
+        if not isinstance(gpu_ids, list):
+            gpu_ids = [gpu_ids]
+        # It is possible for a worker to have access to multiple GPUs.
+        for gpu_id in gpu_ids:
+            node_to_gpu_ids[node_id].add(gpu_id)
+
+    # Assume the first worker is the master.
+    master_addr, master_port = ray.get(workers[0].execute.remote(get_address_and_port))
+
+    setup_futures = []
+    world_size = len(workers)
+    local_ranks = []
+    for rank, worker in enumerate(workers):
+        node_id = node_and_gpu_ids[rank][0]
+        local_rank = node_to_workers[node_id].index(rank)
+        local_world_size = len(node_to_workers[node_id])
+        setup_futures.append(
+            worker.execute.remote(
+                _init_torch_distributed,
+                init_method=init_method,
+                backend=backend,
+                rank=rank,
+                world_size=world_size,
+                local_rank=local_rank,
+                local_world_size=local_world_size,
+                master_addr=master_addr,
+                master_port=master_port,
+                # list(set) will sort the gpu ids, so VISIBLE_CUDA_DEVICES
+                # is always sorted.
+                gpu_ids=list(node_to_gpu_ids[node_id]),
+                **init_process_group_kwargs,
+            )
+        )
+        local_ranks.append(local_rank)
+
+    # Wait for all workers to join the process group.
+    ray.get(setup_futures)
+
+    return local_ranks
+
+
+def _shutdown_torch_distributed():
+    """Shutdown torch distributed backend"""
+    dist.destroy_process_group()
+
+    if not torch.cuda.is_available():
+        return
+
+    # Clean up cuda memory.
+    devices = get_devices()
+    for device in devices:
+        with torch.cuda.device(device):
+            torch.cuda.empty_cache()
+
+
+def shutdown_torch_dist_process_group(workers: List[ActorHandle]):
+    ray.get([w.execute.remote(_shutdown_torch_distributed) for w in workers])
diff --git a/.venv/lib/python3.11/site-packages/ray/air/util/transform_pyarrow.py b/.venv/lib/python3.11/site-packages/ray/air/util/transform_pyarrow.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba14e7d3a15208cc3053532bfe9da80139c940b9
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/air/util/transform_pyarrow.py
@@ -0,0 +1,39 @@
+try:
+    import pyarrow
+except ImportError:
+    pyarrow = None
+
+
+def _is_column_extension_type(ca: "pyarrow.ChunkedArray") -> bool:
+    """Whether the provided Arrow Table column is an extension array, using an Arrow
+    extension type.
+    """
+    return isinstance(ca.type, pyarrow.ExtensionType)
+
+
+def _concatenate_extension_column(ca: "pyarrow.ChunkedArray") -> "pyarrow.Array":
+    """Concatenate chunks of an extension column into a contiguous array.
+
+    This concatenation is required for creating copies and for .take() to work on
+    extension arrays.
+    See https://issues.apache.org/jira/browse/ARROW-16503.
+    """
+    from ray.air.util.tensor_extensions.arrow import (
+        ArrowTensorArray,
+        get_arrow_extension_tensor_types,
+    )
+
+    if not _is_column_extension_type(ca):
+        raise ValueError("Chunked array isn't an extension array: {ca}")
+
+    tensor_extension_types = get_arrow_extension_tensor_types()
+
+    if ca.num_chunks == 0:
+        # Create empty storage array.
+        storage = pyarrow.array([], type=ca.type.storage_type)
+    elif isinstance(ca.type, tensor_extension_types):
+        return ArrowTensorArray._concat_same_type(ca.chunks)
+    else:
+        storage = pyarrow.concat_arrays([c.storage for c in ca.chunks])
+
+    return ca.type.__arrow_ext_class__().from_storage(ca.type, storage)
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/__pycache__/deployment_state.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/__pycache__/deployment_state.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..317f015085953ab2c0791c68e40585a8a5938840
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/__pycache__/deployment_state.cpython-311.pyc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fed1e2f11cb7a8c80b117de5ff1af60d479c6ccb4594d860948a52f971a6ec45
+size 125314
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__init__.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35aa493383a3c39d83e43fb503b363d34f14abac
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/common.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/common.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b0e75a4f9ab9c170277f660af02a5e16ef7932bd
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/common.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/handle_noop_latency.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/handle_noop_latency.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6ecadc4700476d3bcf9b142d4fa806d50681e69
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/handle_noop_latency.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/handle_throughput.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/handle_throughput.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47d667246861e631243a7a531e97e4a76d2ec529
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/handle_throughput.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/http_noop_latency.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/http_noop_latency.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fea9ca501e8b71f9db418d2f73ff62ffda397888
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/http_noop_latency.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/microbenchmark.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/microbenchmark.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15681bcc5742379a123a1b7ca2bb53e18ca1d6b1
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/microbenchmark.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/proxy_benchmark.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/proxy_benchmark.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09cdad088159fe45affaada9c27463c2203a3a6c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/__pycache__/proxy_benchmark.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/common.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5daad3d493f112563e2c129aabbe54de1eed54d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/common.py
@@ -0,0 +1,276 @@
+import asyncio
+import inspect
+import logging
+import random
+import string
+import time
+from functools import partial
+from typing import Any, Callable, Coroutine, List, Optional, Tuple
+
+import aiohttp
+import aiohttp.client_exceptions
+import grpc
+import numpy as np
+import pandas as pd
+from starlette.responses import StreamingResponse
+from tqdm import tqdm
+
+from ray import serve
+from ray.serve.generated import serve_pb2, serve_pb2_grpc
+from ray.serve.handle import DeploymentHandle
+
+
+async def run_latency_benchmark(
+    f: Callable, num_requests: int, *, num_warmup_requests: int = 100
+) -> pd.Series:
+    if inspect.iscoroutinefunction(f):
+        to_call = f
+    else:
+
+        async def to_call():
+            f()
+
+    latencies = []
+    for i in tqdm(range(num_requests + num_warmup_requests)):
+        start = time.perf_counter()
+        await to_call()
+        end = time.perf_counter()
+
+        # Don't include warm-up requests.
+        if i >= num_warmup_requests:
+            latencies.append(1000 * (end - start))
+
+    return pd.Series(latencies)
+
+
+async def run_throughput_benchmark(
+    fn: Callable[[], List[float]],
+    multiplier: int = 1,
+    num_trials: int = 10,
+    trial_runtime: float = 1,
+) -> Tuple[float, float, pd.Series]:
+    """Benchmarks throughput of a function.
+
+    Args:
+        fn: The function to benchmark. If this returns anything, it must
+            return a list of latencies.
+        multiplier: The number of requests or tokens (or whatever unit
+            is appropriate for this throughput benchmark) that is
+            completed in one call to `fn`.
+        num_trials: The number of trials to run.
+        trial_runtime: How long each trial should run for. During the
+            duration of one trial, `fn` will be repeatedly called.
+
+    Returns (mean, stddev, latencies).
+    """
+    # Warmup
+    start = time.time()
+    while time.time() - start < 0.1:
+        await fn()
+
+    # Benchmark
+    stats = []
+    latencies = []
+    for _ in tqdm(range(num_trials)):
+        start = time.perf_counter()
+        count = 0
+        while time.perf_counter() - start < trial_runtime:
+            res = await fn()
+            if res:
+                latencies.extend(res)
+
+            count += 1
+        end = time.perf_counter()
+        stats.append(multiplier * count / (end - start))
+
+    return round(np.mean(stats), 2), round(np.std(stats), 2), pd.Series(latencies)
+
+
+async def do_single_http_batch(
+    *,
+    batch_size: int = 100,
+    url: str = "http://localhost:8000",
+    stream: bool = False,
+) -> List[float]:
+    """Sends a batch of http requests and returns e2e latencies."""
+
+    # By default, aiohttp limits the number of client connections to 100.
+    # We need to use TCPConnector to configure the limit if batch size
+    # is greater than 100.
+    connector = aiohttp.TCPConnector(limit=batch_size)
+    async with aiohttp.ClientSession(
+        connector=connector, raise_for_status=True
+    ) as session:
+
+        async def do_query():
+            start = time.perf_counter()
+            try:
+                if stream:
+                    async with session.get(url) as r:
+                        async for chunk, _ in r.content.iter_chunks():
+                            pass
+                else:
+                    await session.get(url)
+            except aiohttp.client_exceptions.ClientConnectionError:
+                pass
+
+            end = time.perf_counter()
+            return 1000 * (end - start)
+
+        return await asyncio.gather(*[do_query() for _ in range(batch_size)])
+
+
+async def do_single_grpc_batch(
+    *, batch_size: int = 100, target: str = "localhost:9000"
+):
+    channel = grpc.aio.insecure_channel(target)
+    stub = serve_pb2_grpc.RayServeBenchmarkServiceStub(channel)
+    payload = serve_pb2.StringData(data="")
+
+    async def do_query():
+        start = time.perf_counter()
+
+        await stub.grpc_call(payload)
+
+        end = time.perf_counter()
+        return 1000 * (end - start)
+
+    return await asyncio.gather(*[do_query() for _ in range(batch_size)])
+
+
+async def collect_profile_events(coro: Coroutine):
+    """Collects profiling events using Viztracer"""
+
+    from viztracer import VizTracer
+
+    tracer = VizTracer()
+    tracer.start()
+
+    await coro
+
+    tracer.stop()
+    tracer.save()
+
+
+def generate_payload(size: int = 100, chars=string.ascii_uppercase + string.digits):
+    return "".join(random.choice(chars) for _ in range(size))
+
+
+class Blackhole:
+    def sink(self, o):
+        pass
+
+
+@serve.deployment
+class Noop:
+    def __init__(self):
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+    def __call__(self, *args, **kwargs):
+        return b""
+
+
+@serve.deployment
+class Streamer:
+    def __init__(self, tokens_per_request: int, inter_token_delay_ms: int = 10):
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+        self._tokens_per_request = tokens_per_request
+        self._inter_token_delay_s = inter_token_delay_ms / 1000
+
+    async def stream(self):
+        for _ in range(self._tokens_per_request):
+            await asyncio.sleep(self._inter_token_delay_s)
+            yield b"hi"
+
+    async def __call__(self):
+        return StreamingResponse(self.stream())
+
+
+@serve.deployment
+class IntermediateRouter:
+    def __init__(self, handle: DeploymentHandle):
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+        self._handle = handle.options(stream=True)
+
+    async def stream(self):
+        async for token in self._handle.stream.remote():
+            yield token
+
+    def __call__(self):
+        return StreamingResponse(self.stream())
+
+
+@serve.deployment
+class Benchmarker:
+    def __init__(
+        self,
+        handle: DeploymentHandle,
+        stream: bool = False,
+    ):
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+        self._handle = handle.options(stream=stream)
+        self._stream = stream
+
+    async def do_single_request(self, payload: Any = None) -> float:
+        """Completes a single unary request. Returns e2e latency in ms."""
+        start = time.perf_counter()
+
+        if payload is None:
+            await self._handle.remote()
+        else:
+            await self._handle.remote(payload)
+
+        end = time.perf_counter()
+        return 1000 * (end - start)
+
+    async def _do_single_stream(self) -> float:
+        """Consumes a single streaming request. Returns e2e latency in ms."""
+        start = time.perf_counter()
+
+        async for r in self._handle.stream.remote():
+            pass
+
+        end = time.perf_counter()
+        return 1000 * (end - start)
+
+    async def _do_single_batch(self, batch_size: int) -> List[float]:
+        if self._stream:
+            return await asyncio.gather(
+                *[self._do_single_stream() for _ in range(batch_size)]
+            )
+        else:
+            return await asyncio.gather(
+                *[self.do_single_request() for _ in range(batch_size)]
+            )
+
+    async def run_latency_benchmark(
+        self, *, num_requests: int, payload: Any = None
+    ) -> pd.Series:
+        async def f():
+            await self.do_single_request(payload)
+
+        return await run_latency_benchmark(f, num_requests=num_requests)
+
+    async def run_throughput_benchmark(
+        self,
+        *,
+        batch_size: int,
+        num_trials: int,
+        trial_runtime: float,
+        tokens_per_request: Optional[float] = None,
+    ) -> Tuple[float, float]:
+        if self._stream:
+            assert tokens_per_request
+            multiplier = tokens_per_request * batch_size
+        else:
+            multiplier = batch_size
+
+        return await run_throughput_benchmark(
+            fn=partial(
+                self._do_single_batch,
+                batch_size=batch_size,
+            ),
+            multiplier=multiplier,
+            num_trials=num_trials,
+            trial_runtime=trial_runtime,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/handle_noop_latency.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/handle_noop_latency.py
new file mode 100644
index 0000000000000000000000000000000000000000..313e467c6ff0e38f3da21c3dcb40e8b700f9cddf
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/handle_noop_latency.py
@@ -0,0 +1,34 @@
+import time
+
+import click
+import pandas as pd
+
+from ray import serve
+from ray.serve._private.benchmarks.common import Benchmarker, Noop
+from ray.serve.handle import DeploymentHandle
+
+
+@click.command(help="Benchmark no-op DeploymentHandle latency.")
+@click.option("--num-replicas", type=int, default=1)
+@click.option("--num-requests", type=int, default=100)
+def main(num_replicas: int, num_requests: int):
+    h: DeploymentHandle = serve.run(
+        Benchmarker.bind(Noop.options(num_replicas=num_replicas).bind())
+    )
+
+    latencies: pd.Series = h.run_latency_benchmark.remote(
+        num_requests,
+    ).result()
+
+    # Let the logs flush to avoid interwoven output.
+    time.sleep(1)
+
+    print(
+        "Latency (ms) for noop DeploymentHandle requests "
+        f"(num_replicas={num_replicas},num_requests={num_requests}):"
+    )
+    print(latencies.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/handle_throughput.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/handle_throughput.py
new file mode 100644
index 0000000000000000000000000000000000000000..08aa3c215224745cae5bdf7432798dae933b182e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/handle_throughput.py
@@ -0,0 +1,62 @@
+import click
+
+from ray import serve
+from ray.serve._private.benchmarks.common import Benchmarker, Hello
+from ray.serve.handle import DeploymentHandle
+
+
+@click.command(help="Benchmark deployment handle throughput.")
+@click.option(
+    "--batch-size",
+    type=int,
+    default=100,
+    help="Number of requests to send to downstream deployment in each trial.",
+)
+@click.option(
+    "--num-replicas",
+    type=int,
+    default=1,
+    help="Number of replicas in the downstream deployment.",
+)
+@click.option(
+    "--num-trials",
+    type=int,
+    default=5,
+    help="Number of trials of the benchmark to run.",
+)
+@click.option(
+    "--trial-runtime",
+    type=int,
+    default=1,
+    help="Duration to run each trial of the benchmark for (seconds).",
+)
+def main(
+    batch_size: int,
+    num_replicas: int,
+    num_trials: int,
+    trial_runtime: float,
+):
+    app = Benchmarker.bind(
+        Hello.options(
+            num_replicas=num_replicas, ray_actor_options={"num_cpus": 0}
+        ).bind(),
+    )
+    h: DeploymentHandle = serve.run(app)
+
+    mean, stddev = h.run_throughput_benchmark.remote(
+        batch_size=batch_size,
+        num_trials=num_trials,
+        trial_runtime=trial_runtime,
+    ).result()
+
+    print(
+        "DeploymentHandle throughput {}: {} +- {} requests/s".format(
+            f"(num_replicas={num_replicas}, batch_size={batch_size})",
+            mean,
+            stddev,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/http_noop_latency.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/http_noop_latency.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab85f5f882da6ace9d771eb2185154fedd90ffd0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/http_noop_latency.py
@@ -0,0 +1,32 @@
+import asyncio
+
+import click
+import pandas as pd
+import requests
+
+from ray import serve
+from ray.serve._private.benchmarks.common import Noop, run_latency_benchmark
+
+
+@click.command(help="Benchmark no-op HTTP latency.")
+@click.option("--num-replicas", type=int, default=1)
+@click.option("--num-requests", type=int, default=100)
+def main(num_replicas: int, num_requests: int):
+    serve.run(Noop.options(num_replicas=num_replicas).bind())
+
+    latencies: pd.Series = asyncio.new_event_loop().run_until_complete(
+        run_latency_benchmark(
+            lambda: requests.get("http://localhost:8000"),
+            num_requests=num_requests,
+        )
+    )
+
+    print(
+        "Latency (ms) for noop HTTP requests "
+        f"(num_replicas={num_replicas},num_requests={num_requests}):"
+    )
+    print(latencies.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/microbenchmark.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/microbenchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..176298d0f26dd979bc93366e24605a73bd3c85c5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/microbenchmark.py
@@ -0,0 +1,182 @@
+# Runs several scenarios with varying max batch size, max concurrent queries,
+# number of replicas, and with intermediate serve handles (to simulate ensemble
+# models) either on or off.
+
+import asyncio
+import logging
+from pprint import pprint
+from typing import Dict, Union
+
+import aiohttp
+from starlette.requests import Request
+
+import ray
+from ray import serve
+from ray.serve._private.benchmarks.common import run_throughput_benchmark
+from ray.serve.handle import DeploymentHandle
+
+NUM_CLIENTS = 8
+CALLS_PER_BATCH = 100
+
+
+async def fetch(session, data):
+    async with session.get("http://localhost:8000/", data=data) as response:
+        response = await response.text()
+        assert response == "ok", response
+
+
+@ray.remote
+class Client:
+    def ready(self):
+        return "ok"
+
+    async def do_queries(self, num, data):
+        async with aiohttp.ClientSession() as session:
+            for _ in range(num):
+                await fetch(session, data)
+
+
+def build_app(
+    intermediate_handles: bool,
+    num_replicas: int,
+    max_batch_size: int,
+    max_ongoing_requests: int,
+):
+    @serve.deployment(max_ongoing_requests=1000)
+    class Upstream:
+        def __init__(self, handle: DeploymentHandle):
+            self._handle = handle
+
+            # Turn off access log.
+            logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+        async def __call__(self, req: Request):
+            return await self._handle.remote(await req.body())
+
+    @serve.deployment(
+        num_replicas=num_replicas,
+        max_ongoing_requests=max_ongoing_requests,
+    )
+    class Downstream:
+        def __init__(self):
+            # Turn off access log.
+            logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+        @serve.batch(max_batch_size=max_batch_size)
+        async def batch(self, reqs):
+            return [b"ok"] * len(reqs)
+
+        async def __call__(self, req: Union[bytes, Request]):
+            if max_batch_size > 1:
+                return await self.batch(req)
+            else:
+                return b"ok"
+
+    if intermediate_handles:
+        return Upstream.bind(Downstream.bind())
+    else:
+        return Downstream.bind()
+
+
+async def trial(
+    intermediate_handles: bool,
+    num_replicas: int,
+    max_batch_size: int,
+    max_ongoing_requests: int,
+    data_size: str,
+) -> Dict[str, float]:
+    results = {}
+
+    trial_key_base = (
+        f"replica:{num_replicas}/batch_size:{max_batch_size}/"
+        f"concurrent_queries:{max_ongoing_requests}/"
+        f"data_size:{data_size}/intermediate_handle:{intermediate_handles}"
+    )
+
+    print(
+        f"intermediate_handles={intermediate_handles},"
+        f"num_replicas={num_replicas},"
+        f"max_batch_size={max_batch_size},"
+        f"max_ongoing_requests={max_ongoing_requests},"
+        f"data_size={data_size}"
+    )
+
+    app = build_app(
+        intermediate_handles, num_replicas, max_batch_size, max_ongoing_requests
+    )
+    serve.run(app)
+
+    if data_size == "small":
+        data = None
+    elif data_size == "large":
+        data = b"a" * 1024 * 1024
+    else:
+        raise ValueError("data_size should be 'small' or 'large'.")
+
+    async with aiohttp.ClientSession() as session:
+
+        async def single_client():
+            for _ in range(CALLS_PER_BATCH):
+                await fetch(session, data)
+
+        single_client_avg_tps, single_client_std_tps = await run_throughput_benchmark(
+            single_client,
+            multiplier=CALLS_PER_BATCH,
+        )
+        print(
+            "\t{} {} +- {} requests/s".format(
+                "single client {} data".format(data_size),
+                single_client_avg_tps,
+                single_client_std_tps,
+            )
+        )
+        key = f"num_client:1/{trial_key_base}"
+        results[key] = single_client_avg_tps
+
+    clients = [Client.remote() for _ in range(NUM_CLIENTS)]
+    ray.get([client.ready.remote() for client in clients])
+
+    async def many_clients():
+        ray.get([a.do_queries.remote(CALLS_PER_BATCH, data) for a in clients])
+
+    multi_client_avg_tps, _ = await run_throughput_benchmark(
+        many_clients,
+        multiplier=CALLS_PER_BATCH * len(clients),
+    )
+
+    results[f"num_client:{len(clients)}/{trial_key_base}"] = multi_client_avg_tps
+    return results
+
+
+async def main():
+    results = {}
+    for intermediate_handles in [False, True]:
+        for num_replicas in [1, 8]:
+            for max_batch_size, max_ongoing_requests in [
+                (1, 1),
+                (1, 10000),
+                (10000, 10000),
+            ]:
+                # TODO(edoakes): large data causes broken pipe errors.
+                for data_size in ["small"]:
+                    results.update(
+                        await trial(
+                            intermediate_handles,
+                            num_replicas,
+                            max_batch_size,
+                            max_ongoing_requests,
+                            data_size,
+                        )
+                    )
+
+    print("Results from all conditions:")
+    pprint(results)
+    return results
+
+
+if __name__ == "__main__":
+    ray.init()
+    serve.start()
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    loop.run_until_complete(main())
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/proxy_benchmark.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/proxy_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..347c06854a5c014fff256e159cd426ddb6523813
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/proxy_benchmark.py
@@ -0,0 +1,294 @@
+# Runs some request ping to compare HTTP and gRPC performances in TPS and latency.
+# Note: this takes around 1 hour to run.
+
+import asyncio
+import json
+import logging
+import time
+from random import random
+from typing import Callable, Dict
+
+import aiohttp
+import numpy as np
+import pandas as pd
+from grpc import aio
+from starlette.requests import Request
+
+import ray
+from ray import serve
+from ray.serve._private.common import RequestProtocol
+from ray.serve.config import gRPCOptions
+from ray.serve.generated import serve_pb2, serve_pb2_grpc
+from ray.serve.handle import DeploymentHandle
+
+CALLS_PER_BATCH = 100
+DELTA = 10**-7
+
+
+async def get_query_tps(name: str, fn: Callable, multiplier: int = CALLS_PER_BATCH):
+    """Get query TPS.
+
+    Run the function for 0.5 seconds 10 times to calculate how many requests can
+    be completed. And use those stats to calculate the mean and std of TPS.
+    """
+    # warmup
+    start = time.time()
+    while time.time() - start < 0.1:
+        await fn()
+    # real run
+    stats = []
+    for _ in range(10):
+        count = 0
+        start = time.time()
+        while time.time() - start < 0.5:
+            await fn()
+            count += 1
+        end = time.time()
+        stats.append(multiplier * count / (end - start))
+    tps_mean = round(np.mean(stats), 2)
+    tps_std = round(np.std(stats), 2)
+    print(f"\t{name} {tps_mean} +- {tps_std} requests/s")
+
+    return tps_mean, tps_std
+
+
+async def get_query_latencies(name: str, fn: Callable):
+    """Get query latencies.
+
+    Take all the latencies from the function and calculate the mean and std.
+    """
+    many_client_results = np.asarray(await fn())
+    many_client_results.flatten()
+    latency_ms_mean = round(np.mean(many_client_results) * 1000, 2)
+    latency_ms_std = round(np.std(many_client_results) * 1000, 2)
+    print(f"\t{name} {latency_ms_mean} +- {latency_ms_std} ms")
+
+    return latency_ms_mean, latency_ms_std
+
+
+async def fetch_http(session, data):
+    data_json = {"nums": data}
+    response = await session.get("http://localhost:8000/", json=data_json)
+    response_text = await response.read()
+    float(response_text.decode())
+
+
+async def fetch_grpc(stub, data):
+    result = await stub.grpc_call(serve_pb2.RawData(nums=data))
+    result.output
+
+
+@ray.remote
+class HTTPClient:
+    def ready(self):
+        return "ok"
+
+    async def do_queries(self, num, data):
+        async with aiohttp.ClientSession() as session:
+            for _ in range(num):
+                await fetch_http(session, data)
+
+    async def time_queries(self, num, data):
+        stats = []
+        async with aiohttp.ClientSession() as session:
+            for _ in range(num):
+                start = time.time()
+                await fetch_http(session, data)
+                end = time.time()
+                stats.append(end - start)
+
+        return stats
+
+
+@ray.remote
+class gRPCClient:
+    def __init__(self):
+        channel = aio.insecure_channel("localhost:9000")
+        self.stub = serve_pb2_grpc.RayServeBenchmarkServiceStub(channel)
+
+    def ready(self):
+        return "ok"
+
+    async def do_queries(self, num, data):
+        for _ in range(num):
+            await fetch_grpc(self.stub, data)
+
+    async def time_queries(self, num, data):
+        stats = []
+        for _ in range(num):
+            start = time.time()
+            await fetch_grpc(self.stub, data)
+            end = time.time()
+            stats.append(end - start)
+        return stats
+
+
+def build_app(
+    num_replicas: int,
+    max_ongoing_requests: int,
+    data_size: int,
+):
+    @serve.deployment(max_ongoing_requests=1000)
+    class DataPreprocessing:
+        def __init__(self, handle: DeploymentHandle):
+            self._handle = handle
+
+            # Turn off access log.
+            logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+        def normalize(self, raw: np.ndarray) -> np.ndarray:
+            return (raw - np.min(raw)) / (np.max(raw) - np.min(raw) + DELTA)
+
+        async def __call__(self, req: Request):
+            """HTTP entrypoint.
+
+            It parses the request, normalize the data, and send to model for inference.
+            """
+            body = json.loads(await req.body())
+            raw = np.asarray(body["nums"])
+            processed = self.normalize(raw)
+            return await self._handle.remote(processed)
+
+        async def grpc_call(self, raq_data):
+            """gRPC entrypoint.
+
+            It parses the request, normalize the data, and send to model for inference.
+            """
+            raw = np.asarray(raq_data.nums)
+            processed = self.normalize(raw)
+            output = await self._handle.remote(processed)
+            return serve_pb2.ModelOutput(output=output)
+
+        async def call_with_string(self, raq_data):
+            """gRPC entrypoint."""
+            return serve_pb2.ModelOutput(output=0)
+
+    @serve.deployment(
+        num_replicas=num_replicas,
+        max_ongoing_requests=max_ongoing_requests,
+    )
+    class ModelInference:
+        def __init__(self):
+            # Turn off access log.
+            logging.getLogger("ray.serve").setLevel(logging.WARNING)
+            self._model = np.random.randn(data_size, data_size)
+
+        async def __call__(self, processed: np.ndarray) -> float:
+            # Run a dot product with a random matrix to simulate a model inference.
+            model_output = np.dot(processed, self._model)
+            return sum(model_output)
+
+    return DataPreprocessing.bind(ModelInference.bind())
+
+
+async def trial(
+    num_replicas: int,
+    max_ongoing_requests: int,
+    data_size: int,
+    num_clients: int,
+    proxy: RequestProtocol,
+) -> Dict[str, float]:
+    # Generate input data as array of random floats.
+    data = [random() for _ in range(data_size)]
+
+    # Build and deploy the app.
+    app = build_app(
+        num_replicas=num_replicas,
+        max_ongoing_requests=max_ongoing_requests,
+        data_size=data_size,
+    )
+    serve.run(app)
+
+    # Start clients.
+    if proxy == RequestProtocol.GRPC:
+        clients = [gRPCClient.remote() for _ in range(num_clients)]
+    elif proxy == RequestProtocol.HTTP:
+        clients = [HTTPClient.remote() for _ in range(num_clients)]
+    ray.get([client.ready.remote() for client in clients])
+
+    async def client_time_queries():
+        return ray.get([a.time_queries.remote(CALLS_PER_BATCH, data) for a in clients])
+
+    async def client_do_queries():
+        ray.get([a.do_queries.remote(CALLS_PER_BATCH, data) for a in clients])
+
+    trial_key_base = (
+        f"proxy:{proxy}/"
+        f"num_client:{num_clients}/"
+        f"replica:{num_replicas}/"
+        f"concurrent_queries:{max_ongoing_requests}/"
+        f"data_size:{data_size}"
+    )
+    tps_mean, tps_sdt = await get_query_tps(
+        trial_key_base,
+        client_do_queries,
+    )
+    latency_ms_mean, latency_ms_std = await get_query_latencies(
+        trial_key_base,
+        client_time_queries,
+    )
+
+    results = {
+        "proxy": proxy.value,
+        "num_client": num_clients,
+        "replica": num_replicas,
+        "concurrent_queries": max_ongoing_requests,
+        "data_size": data_size,
+        "tps_mean": tps_mean,
+        "tps_sdt": tps_sdt,
+        "latency_ms_mean": latency_ms_mean,
+        "latency_ms_std": latency_ms_std,
+    }
+
+    return results
+
+
+async def main():
+    start_time = time.time()
+    results = []
+    for num_replicas in [1, 8]:
+        for max_ongoing_requests in [1, 10_000]:
+            for data_size in [1, 100, 10_000]:
+                for num_clients in [1, 8]:
+                    for proxy in [RequestProtocol.GRPC, RequestProtocol.HTTP]:
+                        results.append(
+                            await trial(
+                                num_replicas=num_replicas,
+                                max_ongoing_requests=max_ongoing_requests,
+                                data_size=data_size,
+                                num_clients=num_clients,
+                                proxy=proxy,
+                            )
+                        )
+
+    print(f"Total time: {time.time() - start_time}s")
+    print("results", results)
+
+    df = pd.DataFrame.from_dict(results)
+    df = df.sort_values(
+        by=["proxy", "num_client", "replica", "concurrent_queries", "data_size"]
+    )
+    print("Results from all conditions:")
+    # Print the results in with tab separated so we can copy into google sheets.
+    for i in range(len(df.index)):
+        row = list(df.iloc[i])
+        print("\t".join(map(str, row)))
+
+
+if __name__ == "__main__":
+    ray.init()
+
+    grpc_port = 9000
+    grpc_servicer_functions = [
+        "ray.serve.generated.serve_pb2_grpc."
+        "add_RayServeBenchmarkServiceServicer_to_server",
+    ]
+    serve.start(
+        grpc_options=gRPCOptions(
+            port=grpc_port,
+            grpc_servicer_functions=grpc_servicer_functions,
+        )
+    )
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    loop.run_until_complete(main())
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__init__.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94d833d6b877ffe000934dab371a4a64973315b5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/common.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/common.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..afea4238149e9cf7e09e2feb2ce8bae97ecf5a0b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/common.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/serialization_benchmark.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/serialization_benchmark.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3d65b8cd1fdbe34327cdd1252ac395ffa24059f5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/__pycache__/serialization_benchmark.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/common.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..41d535ceaf365044226661c9244312f14d2b260a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/common.py
@@ -0,0 +1,29 @@
+from dataclasses import dataclass
+from typing import List, Optional
+
+from pydantic import BaseModel
+
+#
+# NOTE: PLEASE READ CAREFULLY BEFORE CHANGING
+#
+# Payloads in this module are purposefully extracted from benchmark file to force
+# Ray's cloudpickle behavior when it does NOT serialize the class definition itself
+# along with its payload (instead relying on it being imported)
+#
+
+
+class PayloadPydantic(BaseModel):
+    text: Optional[str] = None
+    floats: Optional[List[float]] = None
+    ints: Optional[List[int]] = None
+    ts: Optional[float] = None
+    reason: Optional[str] = None
+
+
+@dataclass
+class PayloadDataclass:
+    text: Optional[str] = None
+    floats: Optional[List[float]] = None
+    ints: Optional[List[int]] = None
+    ts: Optional[float] = None
+    reason: Optional[str] = None
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/serialization_benchmark.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/serialization_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..12e600758c2213b9c000c2cdc041433f2dff3323
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/serialization/serialization_benchmark.py
@@ -0,0 +1,163 @@
+import asyncio
+import enum
+import pickle
+import time
+from typing import Any, Callable
+
+import click
+import msgpack
+
+from ray._private.serialization import SerializationContext
+from ray.cloudpickle import cloudpickle_fast
+from ray.serve._private.benchmarks.common import (
+    collect_profile_events,
+    run_latency_benchmark,
+)
+from ray.serve._private.benchmarks.serialization.common import (
+    PayloadDataclass,
+    PayloadPydantic,
+)
+
+
+class PayloadType(enum.Enum):
+    PYDANTIC = "pydantic"
+    DATACLASS = "dataclass"
+
+
+class SerializerType(enum.Enum):
+    RAY = "ray"
+    PICKLE = "pickle"
+    CLOUDPICKLE = "cloudpickle"
+    MSGPACK = "msgpack"
+
+
+_PERCENTILES = [0.5, 0.99]
+
+
+sc = SerializationContext(None)
+
+
+def _create_model(cls):
+    return cls(
+        text="Test output",
+        floats=[float(f) for f in range(1, 100)],
+        ints=list(range(1, 100)),
+        ts=time.time(),
+        reason="Success!",
+    )
+
+
+def _blackhole(o):
+    """Placeholder to be used in the benchmark to make sure runtime
+    doesn't optimize out unused results"""
+    pass
+
+
+async def run_serializer_benchmark(
+    model, serializer: Callable[[Any], bytes], iterations: int
+):
+    def _serde_loop():
+        bs = serializer(model)
+        _blackhole(bs)
+
+    pd = await run_latency_benchmark(_serde_loop, iterations)
+
+    print("Latencies (ms):\n", pd.describe(percentiles=_PERCENTILES))
+
+
+@click.command(help="Benchmark serialization latency")
+@click.option(
+    "--trials",
+    type=int,
+    default=1000,
+    help="Total number of trials to run in a single benchmark run",
+)
+@click.option(
+    "--batch-size",
+    type=int,
+    default=10,
+    help="Controls how many objects are contained in a serialized batch",
+)
+@click.option(
+    "--payload-type",
+    type=PayloadType,
+    help="Target type of the payload to be benchmarked (supported: pydantic, "
+    "dataclass)",
+)
+@click.option(
+    "--serializer",
+    type=SerializerType,
+    help="Target type of the serializer to be benchmarked (supported: ray, pickle, "
+    "cloudpickle, msgpack)",
+)
+@click.option(
+    "--profile-events",
+    type=bool,
+    default=False,
+)
+def main(
+    trials: int,
+    batch_size: int,
+    payload_type: PayloadType,
+    serializer: SerializerType,
+    profile_events: bool,
+):
+    if serializer == SerializerType.RAY:
+
+        def _serialize(obj):
+            so = sc.serialize(obj)
+            bs = so.to_bytes()
+            return bs
+
+    elif serializer == SerializerType.CLOUDPICKLE:
+
+        def _serialize(obj):
+            bs = cloudpickle_fast.dumps(obj)
+            return bs
+
+    elif serializer == SerializerType.PICKLE:
+
+        def _serialize(obj):
+            bs = pickle.dumps(obj)
+            return bs
+
+    elif serializer == SerializerType.MSGPACK:
+
+        def _dumps(obj):
+            bs = msgpack.dumps(obj.__dict__)
+            # print(f"Bytes ({len(bs)}): ", bs)
+            return bs
+
+        def _loads(bs):
+            dict = msgpack.loads(bs)
+            return PayloadPydantic(**dict)
+
+        sc._register_cloudpickle_serializer(PayloadPydantic, _dumps, _loads)
+
+        def _serialize(obj):
+            so = sc.serialize(obj)
+            bs = so.to_bytes()
+            return bs
+
+    else:
+        raise NotImplementedError(serializer)
+
+    if payload_type == PayloadType.PYDANTIC:
+        model = _create_model(PayloadPydantic)
+    elif payload_type == PayloadType.DATACLASS:
+        model = _create_model(PayloadDataclass)
+    else:
+        raise NotImplementedError(f"Not supported ({payload_type})")
+
+    payload = [model.copy(deep=True) for _ in range(batch_size)]
+
+    routine = run_serializer_benchmark(payload, _serialize, trials)
+
+    if profile_events:
+        routine = collect_profile_events(routine)
+
+    asyncio.run(routine)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__init__.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a2191abf1f34d7d1f8b6afec777b783c6485694d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/common.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/common.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e1b6c314b133e83dae9ca726ce36987c5ff85b9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/common.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_core_throughput.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_core_throughput.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f4c91ee18c6c2fb02d522c030dd265baf8db7b5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_core_throughput.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_grpc_throughput.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_grpc_throughput.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..323a4bbe2e2c03a35b3c1d3b0e46d0a340486e56
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_grpc_throughput.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_handle_throughput.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_handle_throughput.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c24abe389722ff125cce781c2067914075717f24
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_handle_throughput.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_http_throughput.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_http_throughput.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09168628f4992c4488d632f8f02e326d4677f1b4
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/__pycache__/streaming_http_throughput.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__init__.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc16748feeba490c425f8ab216b06507f726ce5c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/grpc_server.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/grpc_server.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b1e86c296f1f62eadaabaf6322112eacf8e94343
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/grpc_server.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/test_server_pb2_grpc.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/test_server_pb2_grpc.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea98148ce2edf6dc25f3f948f4b257eec1574d45
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/__pycache__/test_server_pb2_grpc.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/grpc_server.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/grpc_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..40f183216c79c545e623b1e3158c3de9138c439a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/grpc_server.py
@@ -0,0 +1,60 @@
+import grpc
+
+from ray.serve._private.benchmarks.streaming._grpc import (
+    test_server_pb2,
+    test_server_pb2_grpc,
+)
+
+
+async def _async_list(async_iterator):
+    items = []
+    async for item in async_iterator:
+        items.append(item)
+
+    return items
+
+
+class TestGRPCServer(test_server_pb2_grpc.GRPCTestServerServicer):
+    def __init__(self, tokens_per_request):
+        self._tokens_per_request = tokens_per_request
+
+    async def Unary(self, request, context):
+        if request.request_data == "error":
+            await context.abort(
+                code=grpc.StatusCode.INTERNAL,
+                details="unary rpc error",
+            )
+
+        return test_server_pb2.Response(response_data="OK")
+
+    async def ClientStreaming(self, request_iterator, context):
+        data = await _async_list(request_iterator)
+
+        if data and data[0].request_data == "error":
+            await context.abort(
+                code=grpc.StatusCode.INTERNAL,
+                details="client streaming rpc error",
+            )
+
+        return test_server_pb2.Response(response_data="OK")
+
+    async def ServerStreaming(self, request, context):
+        if request.request_data == "error":
+            await context.abort(
+                code=grpc.StatusCode.INTERNAL,
+                details="OK",
+            )
+
+        for i in range(self._tokens_per_request):
+            yield test_server_pb2.Response(response_data="OK")
+
+    async def BidiStreaming(self, request_iterator, context):
+        data = await _async_list(request_iterator)
+        if data and data[0].request_data == "error":
+            await context.abort(
+                code=grpc.StatusCode.INTERNAL,
+                details="bidi-streaming rpc error",
+            )
+
+        for i in range(self._tokens_per_request):
+            yield test_server_pb2.Response(response_data="OK")
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/test_server_pb2_grpc.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/test_server_pb2_grpc.py
new file mode 100644
index 0000000000000000000000000000000000000000..96d5ebfbfd531cdb730b3f5e77b38c8982a09d57
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/_grpc/test_server_pb2_grpc.py
@@ -0,0 +1,216 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+
+from ray.serve._private.benchmarks.streaming._grpc import (
+    test_server_pb2 as backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2,
+)
+
+
+class GRPCTestServerStub(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.Unary = channel.unary_unary(
+            "/GRPCTestServer/Unary",
+            request_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            response_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+        )
+        self.ClientStreaming = channel.stream_unary(
+            "/GRPCTestServer/ClientStreaming",
+            request_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            response_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+        )
+        self.ServerStreaming = channel.unary_stream(
+            "/GRPCTestServer/ServerStreaming",
+            request_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            response_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+        )
+        self.BidiStreaming = channel.stream_stream(
+            "/GRPCTestServer/BidiStreaming",
+            request_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            response_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+        )
+
+
+class GRPCTestServerServicer(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def Unary(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+    def ClientStreaming(self, request_iterator, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+    def ServerStreaming(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+    def BidiStreaming(self, request_iterator, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+
+def add_GRPCTestServerServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+        "Unary": grpc.unary_unary_rpc_method_handler(
+            servicer.Unary,
+            request_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.FromString,
+            response_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.SerializeToString,
+        ),
+        "ClientStreaming": grpc.stream_unary_rpc_method_handler(
+            servicer.ClientStreaming,
+            request_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.FromString,
+            response_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.SerializeToString,
+        ),
+        "ServerStreaming": grpc.unary_stream_rpc_method_handler(
+            servicer.ServerStreaming,
+            request_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.FromString,
+            response_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.SerializeToString,
+        ),
+        "BidiStreaming": grpc.stream_stream_rpc_method_handler(
+            servicer.BidiStreaming,
+            request_deserializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.FromString,
+            response_serializer=backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.SerializeToString,
+        ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+        "GRPCTestServer", rpc_method_handlers
+    )
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+# This class is part of an EXPERIMENTAL API.
+class GRPCTestServer(object):
+    """Missing associated documentation comment in .proto file."""
+
+    @staticmethod
+    def Unary(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_unary(
+            request,
+            target,
+            "/GRPCTestServer/Unary",
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
+
+    @staticmethod
+    def ClientStreaming(
+        request_iterator,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.stream_unary(
+            request_iterator,
+            target,
+            "/GRPCTestServer/ClientStreaming",
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
+
+    @staticmethod
+    def ServerStreaming(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_stream(
+            request,
+            target,
+            "/GRPCTestServer/ServerStreaming",
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
+
+    @staticmethod
+    def BidiStreaming(
+        request_iterator,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.stream_stream(
+            request_iterator,
+            target,
+            "/GRPCTestServer/BidiStreaming",
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Request.SerializeToString,
+            backend_dot_server_dot_common_dot_clients_dot_grpc_dot_proto_dot_test__server__pb2.Response.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/common.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5378565f0f089568c530b43167b770ed7db4315
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/common.py
@@ -0,0 +1,123 @@
+import abc
+import asyncio
+import enum
+import logging
+import time
+from typing import Tuple, Union
+
+import numpy as np
+
+from ray.actor import ActorHandle
+from ray.runtime_env import RuntimeEnv
+from ray.serve._private.benchmarks.common import Blackhole, run_throughput_benchmark
+from ray.serve._private.benchmarks.serialization.common import PayloadPydantic
+from ray.serve.handle import DeploymentHandle
+
+GRPC_DEBUG_RUNTIME_ENV = RuntimeEnv(
+    env_vars={"GRPC_TRACE": "http", "GRPC_VERBOSITY": "debug"},
+)
+
+
+class IOMode(enum.Enum):
+    SYNC = "SYNC"
+    ASYNC = "ASYNC"
+
+
+class Endpoint:
+    def __init__(self, tokens_per_request: int):
+        self._tokens_per_request = tokens_per_request
+        # Switch off logging to minimize its impact
+        logging.getLogger("ray").setLevel(logging.WARNING)
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+    def stream(self):
+        payload = PayloadPydantic(
+            text="Test output",
+            floats=[float(f) for f in range(1, 100)],
+            ints=list(range(1, 100)),
+            ts=time.time(),
+            reason="Success!",
+        )
+
+        for i in range(self._tokens_per_request):
+            yield payload
+
+    async def aio_stream(self):
+        payload = PayloadPydantic(
+            text="Test output",
+            floats=[float(f) for f in range(1, 100)],
+            ints=list(range(1, 100)),
+            ts=time.time(),
+            reason="Success!",
+        )
+
+        for i in range(self._tokens_per_request):
+            yield payload
+
+
+class Caller(Blackhole):
+    def __init__(
+        self,
+        downstream: Union[ActorHandle, DeploymentHandle],
+        *,
+        mode: IOMode,
+        tokens_per_request: int,
+        batch_size: int,
+        num_trials: int,
+        trial_runtime: float,
+    ):
+        self._h = downstream
+        self._mode = mode
+        self._tokens_per_request = tokens_per_request
+        self._batch_size = batch_size
+        self._num_trials = num_trials
+        self._trial_runtime = trial_runtime
+        self._durations = []
+
+        # Switch off logging to minimize its impact
+        logging.getLogger("ray").setLevel(logging.WARNING)
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+    def _get_remote_method(self):
+        if self._mode == IOMode.SYNC:
+            return self._h.stream
+        elif self._mode == IOMode.ASYNC:
+            return self._h.aio_stream
+        else:
+            raise NotImplementedError(f"Streaming mode not supported ({self._mode})")
+
+    @abc.abstractmethod
+    async def _consume_single_stream(self):
+        pass
+
+    async def _do_single_batch(self):
+        durations = await asyncio.gather(
+            *[
+                self._execute(self._consume_single_stream)
+                for _ in range(self._batch_size)
+            ]
+        )
+
+        self._durations.extend(durations)
+
+    async def _execute(self, fn):
+        start = time.monotonic()
+        await fn()
+        dur_s = time.monotonic() - start
+        return dur_s * 1000  # ms
+
+    async def run_benchmark(self) -> Tuple[float, float]:
+        coro = run_throughput_benchmark(
+            fn=self._do_single_batch,
+            multiplier=self._batch_size * self._tokens_per_request,
+            num_trials=self._num_trials,
+            trial_runtime=self._trial_runtime,
+        )
+        # total_runtime = await collect_profile_events(coro)
+        total_runtime = await coro
+
+        p50, p75, p99 = np.percentile(self._durations, [50, 75, 99])
+
+        print(f"Individual request quantiles:\n\tP50={p50}\n\tP75={p75}\n\tP99={p99}")
+
+        return total_runtime
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_core_throughput.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_core_throughput.py
new file mode 100644
index 0000000000000000000000000000000000000000..4447d9996caab67918c8760964e6d5e8ee9de0c1
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_core_throughput.py
@@ -0,0 +1,95 @@
+import click
+
+import ray
+from ray.serve._private.benchmarks.streaming.common import Caller, Endpoint, IOMode
+
+
+# @ray.remote(runtime_env=GRPC_DEBUG_RUNTIME_ENV)
+@ray.remote
+class EndpointActor(Endpoint):
+    pass
+
+
+# @ray.remote(runtime_env=GRPC_DEBUG_RUNTIME_ENV)
+@ray.remote
+class CallerActor(Caller):
+    async def _consume_single_stream(self):
+        method = self._get_remote_method()
+        async for ref in method.options(num_returns="streaming").remote():
+            r = ray.get(ref)
+
+            # self.sink(str(r, 'utf-8'))
+            self.sink(r)
+
+
+@click.command(help="Benchmark streaming deployment handle throughput.")
+@click.option(
+    "--tokens-per-request",
+    type=int,
+    default=1000,
+    help="Number of tokens (per request) to stream from downstream deployment",
+)
+@click.option(
+    "--batch-size",
+    type=int,
+    default=10,
+    help="Number of requests to send to downstream deployment in each batch.",
+)
+@click.option(
+    "--num-replicas",
+    type=int,
+    default=1,
+    help="Number of replicas in the downstream deployment.",
+)
+@click.option(
+    "--num-trials",
+    type=int,
+    default=5,
+    help="Number of trials of the benchmark to run.",
+)
+@click.option(
+    "--trial-runtime",
+    type=int,
+    default=5,
+    help="Duration to run each trial of the benchmark for (seconds).",
+)
+@click.option(
+    "--io-mode",
+    type=str,
+    default="async",
+    help="Controls mode of the streaming generation (either 'sync' or 'async')",
+)
+def main(
+    tokens_per_request: int,
+    batch_size: int,
+    num_replicas: int,
+    num_trials: int,
+    trial_runtime: float,
+    io_mode: str,
+):
+    h = CallerActor.remote(
+        EndpointActor.remote(
+            tokens_per_request=tokens_per_request,
+        ),
+        mode=IOMode(io_mode.upper()),
+        tokens_per_request=tokens_per_request,
+        batch_size=batch_size,
+        num_trials=num_trials,
+        trial_runtime=trial_runtime,
+    )
+
+    mean, stddev = ray.get(h.run_benchmark.remote())
+    print(
+        "Core Actors streaming throughput ({}) {}: {} +- {} tokens/s".format(
+            io_mode.upper(),
+            f"(num_replicas={num_replicas}, "
+            f"tokens_per_request={tokens_per_request}, "
+            f"batch_size={batch_size})",
+            mean,
+            stddev,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_grpc_throughput.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_grpc_throughput.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d6e264437cd939ed6139d2a7f126c090a825f8f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_grpc_throughput.py
@@ -0,0 +1,218 @@
+import asyncio
+import logging
+import time
+from concurrent import futures
+from tempfile import TemporaryDirectory
+
+import click
+import grpc
+
+import ray
+from ray.serve._private.benchmarks.streaming._grpc import (
+    test_server_pb2,
+    test_server_pb2_grpc,
+)
+from ray.serve._private.benchmarks.streaming._grpc.grpc_server import TestGRPCServer
+from ray.serve._private.benchmarks.streaming.common import Caller, IOMode
+
+
+# @ray.remote(runtime_env=GRPC_DEBUG_RUNTIME_ENV)
+@ray.remote
+class EndpointActor:
+    async def __init__(self, tokens_per_request, socket_type, tempdir):
+        # Switch off logging to minimize its impact
+        logging.getLogger("ray").setLevel(logging.WARNING)
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+        self.server = await self.start_server(tokens_per_request, socket_type, tempdir)
+
+        print("gRPC server started!")
+
+    @staticmethod
+    async def start_server(tokens_per_request, socket_type, tempdir):
+        server = grpc.aio.server(futures.ThreadPoolExecutor(max_workers=1))
+
+        addr, server_creds, _ = _gen_addr_creds(socket_type, tempdir)
+
+        server.add_secure_port(addr, server_creds)
+
+        await server.start()
+
+        test_server_pb2_grpc.add_GRPCTestServerServicer_to_server(
+            TestGRPCServer(tokens_per_request), server
+        )
+
+        return server
+
+
+# @ray.remote(runtime_env=GRPC_DEBUG_RUNTIME_ENV)
+@ray.remote
+class GrpcCallerActor(Caller):
+    def __init__(
+        self,
+        tempdir,
+        socket_type,
+        *,
+        mode: IOMode,
+        tokens_per_request: int,
+        batch_size: int,
+        num_trials: int,
+        trial_runtime: float,
+    ):
+        super().__init__(
+            self.create_downstream(socket_type, tempdir),
+            mode=mode,
+            tokens_per_request=tokens_per_request,
+            batch_size=batch_size,
+            num_trials=num_trials,
+            trial_runtime=trial_runtime,
+        )
+
+    @staticmethod
+    def create_downstream(socket_type, tempdir):
+        addr, _, channel_creds = _gen_addr_creds(socket_type, tempdir)
+
+        channel = grpc.aio.secure_channel(
+            addr, credentials=channel_creds, interceptors=[]
+        )
+
+        return test_server_pb2_grpc.GRPCTestServerStub(channel)
+
+    async def _consume_single_stream(self):
+        try:
+            async for r in self._h.ServerStreaming(test_server_pb2.Request()):
+                self.sink(r)
+        except Exception as e:
+            print(str(e))
+
+
+def _gen_addr_creds(socket_type, tempdir):
+    if socket_type == "uds":
+        addr = f"unix://{tempdir}/server.sock"
+        server_creds = grpc.local_server_credentials(grpc.LocalConnectionType.UDS)
+        channel_creds = grpc.local_channel_credentials(grpc.LocalConnectionType.UDS)
+    elif socket_type == "local_tcp":
+        addr = "127.0.0.1:5432"
+        server_creds = grpc.local_server_credentials(grpc.LocalConnectionType.LOCAL_TCP)
+        channel_creds = grpc.local_channel_credentials(
+            grpc.LocalConnectionType.LOCAL_TCP
+        )
+    else:
+        raise NotImplementedError(f"Not supported socket type ({socket_type})")
+
+    return addr, server_creds, channel_creds
+
+
+async def run_grpc_benchmark(
+    batch_size,
+    io_mode,
+    socket_type,
+    num_replicas,
+    num_trials,
+    tokens_per_request,
+    trial_runtime,
+):
+    with TemporaryDirectory() as tempdir:
+        _ = EndpointActor.remote(
+            tokens_per_request=tokens_per_request,
+            socket_type=socket_type,
+            tempdir=tempdir,
+        )
+
+        ca = GrpcCallerActor.remote(
+            tempdir,
+            socket_type,
+            mode=IOMode(io_mode.upper()),
+            tokens_per_request=tokens_per_request,
+            batch_size=batch_size,
+            num_trials=num_trials,
+            trial_runtime=trial_runtime,
+        )
+
+        # TODO make starting server a method (to make synchronization explicit)
+        time.sleep(5)
+
+        mean, stddev = await ca.run_benchmark.remote()
+
+        print(
+            "gRPC streaming throughput ({}) {}: {} +- {} tokens/s".format(
+                io_mode.upper(),
+                f"(num_replicas={num_replicas}, "
+                f"tokens_per_request={tokens_per_request}, "
+                f"batch_size={batch_size})",
+                mean,
+                stddev,
+            )
+        )
+
+
+@click.command(help="Benchmark streaming deployment handle throughput.")
+@click.option(
+    "--tokens-per-request",
+    type=int,
+    default=1000,
+    help="Number of tokens (per request) to stream from downstream deployment",
+)
+@click.option(
+    "--batch-size",
+    type=int,
+    default=10,
+    help="Number of requests to send to downstream deployment in each batch.",
+)
+@click.option(
+    "--num-replicas",
+    type=int,
+    default=1,
+    help="Number of replicas in the downstream deployment.",
+)
+@click.option(
+    "--num-trials",
+    type=int,
+    default=5,
+    help="Number of trials of the benchmark to run.",
+)
+@click.option(
+    "--trial-runtime",
+    type=int,
+    default=5,
+    help="Duration to run each trial of the benchmark for (seconds).",
+)
+@click.option(
+    "--io-mode",
+    type=str,
+    default="async",
+    help="Controls mode of the streaming generation (either 'sync' or 'async')",
+)
+@click.option(
+    "--socket-type",
+    type=str,
+    default="local_tcp",
+    help="Controls type of socket used as underlying transport (either 'uds' or "
+    "'local_tcp')",
+)
+def main(
+    tokens_per_request: int,
+    batch_size: int,
+    num_replicas: int,
+    num_trials: int,
+    trial_runtime: float,
+    io_mode: str,
+    socket_type: grpc.LocalConnectionType,
+):
+    """Reference benchmark for vanilla Python (w/ C-based core) gRPC implementation"""
+
+    asyncio.run(
+        run_grpc_benchmark(
+            batch_size,
+            io_mode,
+            socket_type,
+            num_replicas,
+            num_trials,
+            tokens_per_request,
+            trial_runtime,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_handle_throughput.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_handle_throughput.py
new file mode 100644
index 0000000000000000000000000000000000000000..77ed697053b3bdb5437aa28ee7aff24bf5a1e317
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_handle_throughput.py
@@ -0,0 +1,94 @@
+import click
+
+from ray import serve
+from ray.serve._private.benchmarks.streaming.common import Caller, Endpoint, IOMode
+
+
+@serve.deployment(ray_actor_options={"num_cpus": 0})
+class EndpointDeployment(Endpoint):
+    pass
+
+
+@serve.deployment
+class CallerDeployment(Caller):
+    async def _consume_single_stream(self):
+        method = self._get_remote_method().options(
+            stream=True,
+        )
+
+        async for r in method.remote():
+            # Blackhole the response
+            # self.sink(str(r, 'utf-8'))
+            self.sink(r)
+
+
+@click.command(help="Benchmark streaming deployment handle throughput.")
+@click.option(
+    "--tokens-per-request",
+    type=int,
+    default=1000,
+    help="Number of requests to send to downstream deployment in each trial.",
+)
+@click.option(
+    "--batch-size",
+    type=int,
+    default=10,
+    help="Number of requests to send to downstream deployment in each trial.",
+)
+@click.option(
+    "--num-replicas",
+    type=int,
+    default=1,
+    help="Number of replicas in the downstream deployment.",
+)
+@click.option(
+    "--num-trials",
+    type=int,
+    default=5,
+    help="Number of trials of the benchmark to run.",
+)
+@click.option(
+    "--trial-runtime",
+    type=int,
+    default=1,
+    help="Duration to run each trial of the benchmark for (seconds).",
+)
+@click.option(
+    "--io-mode",
+    type=str,
+    default="async",
+    help="Controls mode of the streaming generation (either 'sync' or 'async')",
+)
+def main(
+    tokens_per_request: int,
+    batch_size: int,
+    num_replicas: int,
+    num_trials: int,
+    trial_runtime: float,
+    io_mode: str,
+):
+    app = CallerDeployment.bind(
+        EndpointDeployment.options(num_replicas=num_replicas).bind(tokens_per_request),
+        mode=IOMode(io_mode.upper()),
+        tokens_per_request=tokens_per_request,
+        batch_size=batch_size,
+        num_trials=num_trials,
+        trial_runtime=trial_runtime,
+    )
+    h = serve.run(app)
+
+    mean, stddev = h.run_benchmark.remote().result()
+    print(
+        "DeploymentHandle streaming throughput ({}) {}: {} +- {} tokens/s".format(
+            io_mode.upper(),
+            f"(num_replicas={num_replicas}, "
+            f"tokens_per_request={tokens_per_request}, "
+            f"batch_size={batch_size})",
+            mean,
+            stddev,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_http_throughput.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_http_throughput.py
new file mode 100644
index 0000000000000000000000000000000000000000..b80b5e5b5dac41059362a1b13e762114efcd1717
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/benchmarks/streaming/streaming_http_throughput.py
@@ -0,0 +1,140 @@
+import asyncio
+import logging
+from typing import Tuple
+
+import aiohttp
+import click
+from starlette.responses import StreamingResponse
+
+from ray import serve
+from ray.serve._private.benchmarks.common import run_throughput_benchmark
+from ray.serve.handle import DeploymentHandle
+
+
+@serve.deployment(ray_actor_options={"num_cpus": 0})
+class Downstream:
+    def __init__(self, tokens_per_request: int):
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+        self._tokens_per_request = tokens_per_request
+
+    async def stream(self):
+        for i in range(self._tokens_per_request):
+            yield "hi"
+
+    def __call__(self, *args):
+        return StreamingResponse(self.stream())
+
+
+@serve.deployment(ray_actor_options={"num_cpus": 0})
+class Intermediate:
+    def __init__(self, downstream: DeploymentHandle):
+        logging.getLogger("ray.serve").setLevel(logging.WARNING)
+
+        self._h = downstream.options(stream=True)
+
+    async def stream(self):
+        async for token in self._h.stream.remote():
+            yield token
+
+    def __call__(self, *args):
+        return StreamingResponse(self.stream())
+
+
+async def _consume_single_stream():
+    async with aiohttp.ClientSession(raise_for_status=True) as session:
+        async with session.get("http://localhost:8000") as r:
+            async for line in r.content:
+                pass
+
+
+async def run_benchmark(
+    tokens_per_request: int,
+    batch_size: int,
+    num_trials: int,
+    trial_runtime: float,
+) -> Tuple[float, float]:
+    async def _do_single_batch():
+        await asyncio.gather(*[_consume_single_stream() for _ in range(batch_size)])
+
+    return await run_throughput_benchmark(
+        fn=_do_single_batch,
+        multiplier=batch_size * tokens_per_request,
+        num_trials=num_trials,
+        trial_runtime=trial_runtime,
+    )
+
+
+@click.command(help="Benchmark streaming HTTP throughput.")
+@click.option(
+    "--tokens-per-request",
+    type=int,
+    default=1000,
+    help="Number of requests to send to downstream deployment in each trial.",
+)
+@click.option(
+    "--batch-size",
+    type=int,
+    default=10,
+    help="Number of requests to send to downstream deployment in each trial.",
+)
+@click.option(
+    "--num-replicas",
+    type=int,
+    default=1,
+    help="Number of replicas in the downstream deployment.",
+)
+@click.option(
+    "--num-trials",
+    type=int,
+    default=5,
+    help="Number of trials of the benchmark to run.",
+)
+@click.option(
+    "--trial-runtime",
+    type=int,
+    default=1,
+    help="Duration to run each trial of the benchmark for (seconds).",
+)
+@click.option(
+    "--use-intermediate-deployment",
+    is_flag=True,
+    default=False,
+    help="Whether to run an intermediate deployment proxying the requests.",
+)
+def main(
+    tokens_per_request: int,
+    batch_size: int,
+    num_replicas: int,
+    num_trials: int,
+    trial_runtime: float,
+    use_intermediate_deployment: bool,
+):
+    app = Downstream.options(num_replicas=num_replicas).bind(tokens_per_request)
+    if use_intermediate_deployment:
+        app = Intermediate.bind(app)
+
+    serve.run(app)
+
+    mean, stddev = asyncio.new_event_loop().run_until_complete(
+        run_benchmark(
+            tokens_per_request,
+            batch_size,
+            num_trials,
+            trial_runtime,
+        )
+    )
+    print(
+        "HTTP streaming throughput {}: {} +- {} tokens/s".format(
+            f"(num_replicas={num_replicas}, "
+            f"tokens_per_request={tokens_per_request}, "
+            f"batch_size={batch_size}, "
+            f"use_intermediate_deployment={use_intermediate_deployment})",
+            mean,
+            stddev,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__init__.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c98b6452dcb385b889a403e1c3f0cfbe260e4403
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__init__.py
@@ -0,0 +1,10 @@
+from ray.serve._private.replica_scheduler.common import PendingRequest  # noqa: F401
+from ray.serve._private.replica_scheduler.pow_2_scheduler import (  # noqa: F401
+    PowerOfTwoChoicesReplicaScheduler,
+)
+from ray.serve._private.replica_scheduler.replica_scheduler import (  # noqa: F401
+    ReplicaScheduler,
+)
+from ray.serve._private.replica_scheduler.replica_wrapper import (  # noqa: F401
+    RunningReplica,
+)
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5300dc57dd2741a294cd576c8f235c4b5b97d291
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/common.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/common.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9a605c35eb3150ac0938bf6f284f555dbebf4f1
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/common.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/pow_2_scheduler.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/pow_2_scheduler.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..851de5f5ef80005ee8366efb74d25dd3bf405e3f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/pow_2_scheduler.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/replica_scheduler.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/replica_scheduler.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7459c0acaaf4e7206cccaa2068ec5e26a54360f8
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/replica_scheduler.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/replica_wrapper.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/replica_wrapper.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d6b34481552591e530a1264d2fdf3034995ed46
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/replica_scheduler/__pycache__/replica_wrapper.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__init__.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..effdd8720f5dea93eb41562d62786f2e0653c5c1
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/kv_store.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/kv_store.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eec40d88d6604465e43a60d6a9a1c6bad7cec61b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/kv_store.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/kv_store_base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/kv_store_base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e85a84a955161a2a30448f3457c96de643057e9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/__pycache__/kv_store_base.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/kv_store.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/kv_store.py
new file mode 100644
index 0000000000000000000000000000000000000000..32c9db64b331378a6de12c17ecda6f71ef8c193f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/kv_store.py
@@ -0,0 +1,108 @@
+import logging
+from typing import Optional
+
+import ray
+import ray.serve._private.constants as serve_constants
+from ray._private import ray_constants
+from ray._raylet import GcsClient
+from ray.serve._private.storage.kv_store_base import KVStoreBase
+
+logger = logging.getLogger(serve_constants.SERVE_LOGGER_NAME)
+
+
+def get_storage_key(namespace: str, storage_key: str) -> str:
+    """In case we need to access kvstore"""
+    return "{ns}-{key}".format(ns=namespace, key=storage_key)
+
+
+class KVStoreError(Exception):
+    def __init__(self, rpc_code):
+        self.rpc_code = rpc_code
+
+
+class RayInternalKVStore(KVStoreBase):
+    """Wraps ray's internal_kv with a namespace to avoid collisions.
+
+    Supports string keys and bytes values, caller must handle serialization.
+    """
+
+    def __init__(
+        self,
+        namespace: Optional[str] = None,
+        gcs_client: Optional[GcsClient] = None,
+    ):
+        if namespace is not None and not isinstance(namespace, str):
+            raise TypeError("namespace must a string, got: {}.".format(type(namespace)))
+        if gcs_client is not None:
+            self.gcs_client = gcs_client
+        else:
+            self.gcs_client = GcsClient(address=ray.get_runtime_context().gcs_address)
+        self.timeout = serve_constants.RAY_SERVE_KV_TIMEOUT_S
+        self.namespace = namespace or ""
+
+    def get_storage_key(self, key: str) -> str:
+        return "{ns}-{key}".format(ns=self.namespace, key=key)
+
+    def put(self, key: str, val: bytes) -> bool:
+        """Put the key-value pair into the store.
+
+        Args:
+            key (str)
+            val (bytes)
+        """
+        if not isinstance(key, str):
+            raise TypeError("key must be a string, got: {}.".format(type(key)))
+        if not isinstance(val, bytes):
+            raise TypeError("val must be bytes, got: {}.".format(type(val)))
+
+        try:
+            return self.gcs_client.internal_kv_put(
+                self.get_storage_key(key).encode(),
+                val,
+                overwrite=True,
+                namespace=ray_constants.KV_NAMESPACE_SERVE,
+                timeout=self.timeout,
+            )
+        except ray.exceptions.RpcError as e:
+            raise KVStoreError(e.rpc_code)
+
+    def get(self, key: str) -> Optional[bytes]:
+        """Get the value associated with the given key from the store.
+
+        Args:
+            key (str)
+
+        Returns:
+            The bytes value. If the key wasn't found, returns None.
+        """
+        if not isinstance(key, str):
+            raise TypeError("key must be a string, got: {}.".format(type(key)))
+
+        try:
+            return self.gcs_client.internal_kv_get(
+                self.get_storage_key(key).encode(),
+                namespace=ray_constants.KV_NAMESPACE_SERVE,
+                timeout=self.timeout,
+            )
+        except ray.exceptions.RpcError as e:
+            raise KVStoreError(e.rpc_code)
+
+    def delete(self, key: str):
+        """Delete the value associated with the given key from the store.
+
+        Args:
+            key (str)
+        """
+
+        if not isinstance(key, str):
+            raise TypeError("key must be a string, got: {}.".format(type(key)))
+
+        try:
+            return self.gcs_client.internal_kv_del(
+                self.get_storage_key(key).encode(),
+                False,
+                namespace=ray_constants.KV_NAMESPACE_SERVE,
+                timeout=self.timeout,
+            )
+        except ray.exceptions.RpcError as e:
+            raise KVStoreError(e.rpc_code)
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/kv_store_base.py b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/kv_store_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..68216e6547d169963f424c726fd2494a774c0039
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/_private/storage/kv_store_base.py
@@ -0,0 +1,56 @@
+import abc
+from abc import abstractmethod
+from typing import Optional
+
+from ray.util.annotations import DeveloperAPI
+
+
+@DeveloperAPI
+class KVStoreBase(metaclass=abc.ABCMeta):
+    """Abstract class for KVStore defining APIs needed for ray serve
+    use cases, currently (8/6/2021) controller state checkpointing.
+    """
+
+    @abstractmethod
+    def get_storage_key(self, key: str) -> str:
+        """Get internal key for storage.
+
+        Args:
+            key: User provided key
+
+        Returns:
+            storage_key: Formatted key for storage, usually by
+                prepending namespace.
+        """
+        raise NotImplementedError("get_storage_key() has to be implemented")
+
+    @abstractmethod
+    def put(self, key: str, val: bytes) -> bool:
+        """Put object into kv store, bytes only.
+
+        Args:
+            key: Key for object to be stored.
+            val: Byte value of object.
+        """
+        raise NotImplementedError("put() has to be implemented")
+
+    @abstractmethod
+    def get(self, key: str) -> Optional[bytes]:
+        """Get object from storage.
+
+        Args:
+            key: Key for object to be retrieved.
+
+        Returns:
+            val: Byte value of object from storage.
+        """
+        raise NotImplementedError("get() has to be implemented")
+
+    @abstractmethod
+    def delete(self, key: str) -> None:
+        """Delete an object.
+
+        Args:
+            key: Key for object to be deleted.
+        """
+        raise NotImplementedError("delete() has to be implemented")
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/generated/__pycache__/serve_pb2.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/generated/__pycache__/serve_pb2.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0dc5c34f6f9bef9000814b6408011c56f505ac35
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/generated/__pycache__/serve_pb2.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/generated/__pycache__/serve_pb2_grpc.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/serve/generated/__pycache__/serve_pb2_grpc.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0cc8cf0c60b3a84e7b97cce0affeb46301310999
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/serve/generated/__pycache__/serve_pb2_grpc.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/generated/serve_pb2.py b/.venv/lib/python3.11/site-packages/ray/serve/generated/serve_pb2.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd60d0d48d418f36eff54fa1dada4dd088fe7bb1
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/generated/serve_pb2.py
@@ -0,0 +1,506 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: src/ray/protobuf/serve.proto
+"""Generated protocol buffer code."""
+from google.protobuf.internal import enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1csrc/ray/protobuf/serve.proto\x12\tray.serve\"\xcc\x06\n\x11\x41utoscalingConfig\x12!\n\x0cmin_replicas\x18\x01 \x01(\rR\x0bminReplicas\x12!\n\x0cmax_replicas\x18\x02 \x01(\rR\x0bmaxReplicas\x12,\n\x12metrics_interval_s\x18\x03 \x01(\x01R\x10metricsIntervalS\x12+\n\x12look_back_period_s\x18\x04 \x01(\x01R\x0flookBackPeriodS\x12)\n\x10smoothing_factor\x18\x05 \x01(\x01R\x0fsmoothingFactor\x12*\n\x11\x64ownscale_delay_s\x18\x06 \x01(\x01R\x0f\x64ownscaleDelayS\x12&\n\x0fupscale_delay_s\x18\x07 \x01(\x01R\rupscaleDelayS\x12.\n\x10initial_replicas\x18\x08 \x01(\rH\x00R\x0finitialReplicas\x88\x01\x01\x12=\n\x18upscale_smoothing_factor\x18\t \x01(\x01H\x01R\x16upscaleSmoothingFactor\x88\x01\x01\x12\x41\n\x1a\x64ownscale_smoothing_factor\x18\n \x01(\x01H\x02R\x18\x64ownscaleSmoothingFactor\x88\x01\x01\x12\x33\n\x16_serialized_policy_def\x18\x0b \x01(\x0cR\x13SerializedPolicyDef\x12\x17\n\x07_policy\x18\x0c \x01(\tR\x06Policy\x12\x36\n\x17target_ongoing_requests\x18\r \x01(\x01R\x15targetOngoingRequests\x12.\n\x10upscaling_factor\x18\x0e \x01(\x01H\x03R\x0fupscalingFactor\x88\x01\x01\x12\x32\n\x12\x64ownscaling_factor\x18\x0f \x01(\x01H\x04R\x11\x64ownscalingFactor\x88\x01\x01\x42\x13\n\x11_initial_replicasB\x1b\n\x19_upscale_smoothing_factorB\x1d\n\x1b_downscale_smoothing_factorB\x13\n\x11_upscaling_factorB\x15\n\x13_downscaling_factor\"\xa8\x01\n\rLoggingConfig\x12\x33\n\x08\x65ncoding\x18\x01 \x01(\x0e\x32\x17.ray.serve.EncodingTypeR\x08\x65ncoding\x12\x1b\n\tlog_level\x18\x02 \x01(\tR\x08logLevel\x12\x19\n\x08logs_dir\x18\x03 \x01(\tR\x07logsDir\x12*\n\x11\x65nable_access_log\x18\x04 \x01(\x08R\x0f\x65nableAccessLog\"\x86\x06\n\x10\x44\x65ploymentConfig\x12!\n\x0cnum_replicas\x18\x01 \x01(\x05R\x0bnumReplicas\x12\x30\n\x14max_ongoing_requests\x18\x02 \x01(\x05R\x12maxOngoingRequests\x12.\n\x13max_queued_requests\x18\x03 \x01(\x05R\x11maxQueuedRequests\x12\x1f\n\x0buser_config\x18\x04 \x01(\x0cR\nuserConfig\x12@\n\x1dgraceful_shutdown_wait_loop_s\x18\x05 \x01(\x01R\x19gracefulShutdownWaitLoopS\x12=\n\x1bgraceful_shutdown_timeout_s\x18\x06 \x01(\x01R\x18gracefulShutdownTimeoutS\x12\x31\n\x15health_check_period_s\x18\x07 \x01(\x01R\x12healthCheckPeriodS\x12\x33\n\x16health_check_timeout_s\x18\x08 \x01(\x01R\x13healthCheckTimeoutS\x12*\n\x11is_cross_language\x18\t \x01(\x08R\x0fisCrossLanguage\x12N\n\x13\x64\x65ployment_language\x18\n \x01(\x0e\x32\x1d.ray.serve.DeploymentLanguageR\x12\x64\x65ploymentLanguage\x12K\n\x12\x61utoscaling_config\x18\x0b \x01(\x0b\x32\x1c.ray.serve.AutoscalingConfigR\x11\x61utoscalingConfig\x12\x18\n\x07version\x18\x0c \x01(\tR\x07version\x12?\n\x1cuser_configured_option_names\x18\r \x03(\tR\x19userConfiguredOptionNames\x12?\n\x0elogging_config\x18\x0e \x01(\x0b\x32\x18.ray.serve.LoggingConfigR\rloggingConfig\"\xc8\x02\n\x0fRequestMetadata\x12\x1d\n\nrequest_id\x18\x01 \x01(\tR\trequestId\x12.\n\x13internal_request_id\x18\x02 \x01(\tR\x11internalRequestId\x12\x1f\n\x0b\x63\x61ll_method\x18\x03 \x01(\tR\ncallMethod\x12\x41\n\x07\x63ontext\x18\x04 \x03(\x0b\x32\'.ray.serve.RequestMetadata.ContextEntryR\x07\x63ontext\x12\x30\n\x14multiplexed_model_id\x18\x05 \x01(\tR\x12multiplexedModelId\x12\x14\n\x05route\x18\x06 \x01(\tR\x05route\x1a:\n\x0c\x43ontextEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"$\n\x0eRequestWrapper\x12\x12\n\x04\x62ody\x18\x01 \x01(\x0cR\x04\x62ody\"Y\n\rUpdatedObject\x12\'\n\x0fobject_snapshot\x18\x01 \x01(\x0cR\x0eobjectSnapshot\x12\x1f\n\x0bsnapshot_id\x18\x02 \x01(\x05R\nsnapshotId\"\xbb\x01\n\x0fLongPollRequest\x12\x62\n\x14keys_to_snapshot_ids\x18\x01 \x03(\x0b\x32\x31.ray.serve.LongPollRequest.KeysToSnapshotIdsEntryR\x11keysToSnapshotIds\x1a\x44\n\x16KeysToSnapshotIdsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\x05R\x05value:\x02\x38\x01\"\xc5\x01\n\x0eLongPollResult\x12V\n\x0fupdated_objects\x18\x01 \x03(\x0b\x32-.ray.serve.LongPollResult.UpdatedObjectsEntryR\x0eupdatedObjects\x1a[\n\x13UpdatedObjectsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x18.ray.serve.UpdatedObjectR\x05value:\x02\x38\x01\"\xc1\x01\n\x0c\x45ndpointInfo\x12#\n\rendpoint_name\x18\x01 \x01(\tR\x0c\x65ndpointName\x12\x14\n\x05route\x18\x02 \x01(\tR\x05route\x12;\n\x06\x63onfig\x18\x03 \x03(\x0b\x32#.ray.serve.EndpointInfo.ConfigEntryR\x06\x63onfig\x1a\x39\n\x0b\x43onfigEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\xa9\x01\n\x0b\x45ndpointSet\x12\x43\n\tendpoints\x18\x01 \x03(\x0b\x32%.ray.serve.EndpointSet.EndpointsEntryR\tendpoints\x1aU\n\x0e\x45ndpointsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12-\n\x05value\x18\x02 \x01(\x0b\x32\x17.ray.serve.EndpointInfoR\x05value:\x02\x38\x01\"%\n\rActorNameList\x12\x14\n\x05names\x18\x01 \x03(\tR\x05names\"^\n\x14\x44\x65ploymentTargetInfo\x12#\n\rreplica_names\x18\x01 \x03(\tR\x0creplicaNames\x12!\n\x0cis_available\x18\x02 \x01(\x08R\x0bisAvailable\"\xd1\x02\n\x11\x44\x65ploymentVersion\x12!\n\x0c\x63ode_version\x18\x01 \x01(\tR\x0b\x63odeVersion\x12H\n\x11\x64\x65ployment_config\x18\x02 \x01(\x0b\x32\x1b.ray.serve.DeploymentConfigR\x10\x64\x65ploymentConfig\x12*\n\x11ray_actor_options\x18\x03 \x01(\tR\x0frayActorOptions\x12\x36\n\x17placement_group_bundles\x18\x04 \x01(\tR\x15placementGroupBundles\x12\x38\n\x18placement_group_strategy\x18\x05 \x01(\tR\x16placementGroupStrategy\x12\x31\n\x15max_replicas_per_node\x18\x06 \x01(\x05R\x12maxReplicasPerNode\"\xf5\x02\n\rReplicaConfig\x12.\n\x13\x64\x65ployment_def_name\x18\x01 \x01(\tR\x11\x64\x65ploymentDefName\x12%\n\x0e\x64\x65ployment_def\x18\x02 \x01(\x0cR\rdeploymentDef\x12\x1b\n\tinit_args\x18\x03 \x01(\x0cR\x08initArgs\x12\x1f\n\x0binit_kwargs\x18\x04 \x01(\x0cR\ninitKwargs\x12*\n\x11ray_actor_options\x18\x05 \x01(\tR\x0frayActorOptions\x12\x36\n\x17placement_group_bundles\x18\x06 \x01(\tR\x15placementGroupBundles\x12\x38\n\x18placement_group_strategy\x18\x07 \x01(\tR\x16placementGroupStrategy\x12\x31\n\x15max_replicas_per_node\x18\x08 \x01(\x05R\x12maxReplicasPerNode\"\xb5\x03\n\x0e\x44\x65ploymentInfo\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12H\n\x11\x64\x65ployment_config\x18\x02 \x01(\x0b\x32\x1b.ray.serve.DeploymentConfigR\x10\x64\x65ploymentConfig\x12?\n\x0ereplica_config\x18\x03 \x01(\x0b\x32\x18.ray.serve.ReplicaConfigR\rreplicaConfig\x12\"\n\rstart_time_ms\x18\x04 \x01(\x03R\x0bstartTimeMs\x12\x1d\n\nactor_name\x18\x05 \x01(\tR\tactorName\x12\x18\n\x07version\x18\x06 \x01(\tR\x07version\x12\x1e\n\x0b\x65nd_time_ms\x18\x07 \x01(\x03R\tendTimeMs\x12\'\n\x0ftarget_capacity\x18\x08 \x01(\x01R\x0etargetCapacity\x12^\n\x19target_capacity_direction\x18\t \x01(\x0e\x32\".ray.serve.TargetCapacityDirectionR\x17targetCapacityDirection\"k\n\x0f\x44\x65ploymentRoute\x12\x42\n\x0f\x64\x65ployment_info\x18\x01 \x01(\x0b\x32\x19.ray.serve.DeploymentInfoR\x0e\x64\x65ploymentInfo\x12\x14\n\x05route\x18\x02 \x01(\tR\x05route\"^\n\x13\x44\x65ploymentRouteList\x12G\n\x11\x64\x65ployment_routes\x18\x01 \x03(\x0b\x32\x1a.ray.serve.DeploymentRouteR\x10\x64\x65ploymentRoutes\"\xc4\x01\n\x14\x44\x65ploymentStatusInfo\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x33\n\x06status\x18\x02 \x01(\x0e\x32\x1b.ray.serve.DeploymentStatusR\x06status\x12\x18\n\x07message\x18\x03 \x01(\tR\x07message\x12I\n\x0estatus_trigger\x18\x04 \x01(\x0e\x32\".ray.serve.DeploymentStatusTriggerR\rstatusTrigger\"s\n\x18\x44\x65ploymentStatusInfoList\x12W\n\x17\x64\x65ployment_status_infos\x18\x01 \x03(\x0b\x32\x1f.ray.serve.DeploymentStatusInfoR\x15\x64\x65ploymentStatusInfos\"\x9a\x01\n\x15\x41pplicationStatusInfo\x12\x34\n\x06status\x18\x01 \x01(\x0e\x32\x1c.ray.serve.ApplicationStatusR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x31\n\x14\x64\x65ployment_timestamp\x18\x03 \x01(\x01R\x13\x64\x65ploymentTimestamp\"\xbb\x01\n\x0eStatusOverview\x12?\n\napp_status\x18\x01 \x01(\x0b\x32 .ray.serve.ApplicationStatusInfoR\tappStatus\x12T\n\x13\x64\x65ployment_statuses\x18\x02 \x01(\x0b\x32#.ray.serve.DeploymentStatusInfoListR\x12\x64\x65ploymentStatuses\x12\x12\n\x04name\x18\x03 \x01(\tR\x04name\"\x19\n\x17ListApplicationsRequest\"G\n\x18ListApplicationsResponse\x12+\n\x11\x61pplication_names\x18\x01 \x03(\tR\x10\x61pplicationNames\"\x10\n\x0eHealthzRequest\"+\n\x0fHealthzResponse\x12\x18\n\x07message\x18\x01 \x01(\tR\x07message\"L\n\x12UserDefinedMessage\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x10\n\x03\x66oo\x18\x02 \x01(\tR\x03\x66oo\x12\x10\n\x03num\x18\x03 \x01(\x03R\x03num\"H\n\x13UserDefinedResponse\x12\x1a\n\x08greeting\x18\x01 \x01(\tR\x08greeting\x12\x15\n\x06num_x2\x18\x02 \x01(\x03R\x05numX2\"\x15\n\x13UserDefinedMessage2\"2\n\x14UserDefinedResponse2\x12\x1a\n\x08greeting\x18\x01 \x01(\tR\x08greeting\"T\n\x0c\x46ruitAmounts\x12\x16\n\x06orange\x18\x01 \x01(\x03R\x06orange\x12\x14\n\x05\x61pple\x18\x02 \x01(\x03R\x05\x61pple\x12\x16\n\x06\x62\x61nana\x18\x03 \x01(\x03R\x06\x62\x61nana\"\"\n\nFruitCosts\x12\x14\n\x05\x63osts\x18\x01 \x01(\x02R\x05\x63osts\"\x1f\n\tArrayData\x12\x12\n\x04nums\x18\x01 \x03(\x02R\x04nums\" \n\nStringData\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\tR\x04\x64\x61ta\"%\n\x0bModelOutput\x12\x16\n\x06output\x18\x01 \x01(\x02R\x06output\"\xb8\x02\n\x0e\x44\x65ploymentArgs\x12\'\n\x0f\x64\x65ployment_name\x18\x01 \x01(\tR\x0e\x64\x65ploymentName\x12+\n\x11\x64\x65ployment_config\x18\x02 \x01(\x0cR\x10\x64\x65ploymentConfig\x12%\n\x0ereplica_config\x18\x03 \x01(\x0cR\rreplicaConfig\x12&\n\x0f\x64\x65ployer_job_id\x18\x04 \x01(\tR\rdeployerJobId\x12&\n\x0croute_prefix\x18\x05 \x01(\tH\x00R\x0broutePrefix\x88\x01\x01\x12\x18\n\x07ingress\x18\x06 \x01(\x08R\x07ingress\x12 \n\tdocs_path\x18\x07 \x01(\tH\x01R\x08\x64ocsPath\x88\x01\x01\x42\x0f\n\r_route_prefixB\x0c\n\n_docs_path*\"\n\x0c\x45ncodingType\x12\x08\n\x04TEXT\x10\x00\x12\x08\n\x04JSON\x10\x01**\n\x12\x44\x65ploymentLanguage\x12\n\n\x06PYTHON\x10\x00\x12\x08\n\x04JAVA\x10\x01*6\n\x17TargetCapacityDirection\x12\t\n\x05UNSET\x10\x00\x12\x06\n\x02UP\x10\x01\x12\x08\n\x04\x44OWN\x10\x02*\xdb\x01\n\x10\x44\x65ploymentStatus\x12\x1e\n\x1a\x44\x45PLOYMENT_STATUS_UPDATING\x10\x00\x12\x1d\n\x19\x44\x45PLOYMENT_STATUS_HEALTHY\x10\x01\x12\x1f\n\x1b\x44\x45PLOYMENT_STATUS_UNHEALTHY\x10\x02\x12#\n\x1f\x44\x45PLOYMENT_STATUS_DEPLOY_FAILED\x10\x03\x12\x1f\n\x1b\x44\x45PLOYMENT_STATUS_UPSCALING\x10\x04\x12!\n\x1d\x44\x45PLOYMENT_STATUS_DOWNSCALING\x10\x05*\xfe\x03\n\x17\x44\x65ploymentStatusTrigger\x12)\n%DEPLOYMENT_STATUS_TRIGGER_UNSPECIFIED\x10\x00\x12\x33\n/DEPLOYMENT_STATUS_TRIGGER_CONFIG_UPDATE_STARTED\x10\x01\x12\x35\n1DEPLOYMENT_STATUS_TRIGGER_CONFIG_UPDATE_COMPLETED\x10\x02\x12/\n+DEPLOYMENT_STATUS_TRIGGER_UPSCALE_COMPLETED\x10\x03\x12\x31\n-DEPLOYMENT_STATUS_TRIGGER_DOWNSCALE_COMPLETED\x10\x04\x12)\n%DEPLOYMENT_STATUS_TRIGGER_AUTOSCALING\x10\x05\x12\x34\n0DEPLOYMENT_STATUS_TRIGGER_REPLICA_STARTUP_FAILED\x10\x06\x12\x31\n-DEPLOYMENT_STATUS_TRIGGER_HEALTH_CHECK_FAILED\x10\x07\x12,\n(DEPLOYMENT_STATUS_TRIGGER_INTERNAL_ERROR\x10\x08\x12&\n\"DEPLOYMENT_STATUS_TRIGGER_DELETING\x10\t*\xe2\x01\n\x11\x41pplicationStatus\x12 \n\x1c\x41PPLICATION_STATUS_DEPLOYING\x10\x00\x12\x1e\n\x1a\x41PPLICATION_STATUS_RUNNING\x10\x01\x12$\n APPLICATION_STATUS_DEPLOY_FAILED\x10\x02\x12\x1f\n\x1b\x41PPLICATION_STATUS_DELETING\x10\x03\x12\"\n\x1e\x41PPLICATION_STATUS_NOT_STARTED\x10\x05\x12 \n\x1c\x41PPLICATION_STATUS_UNHEALTHY\x10\x06\x32\xb3\x01\n\x12RayServeAPIService\x12[\n\x10ListApplications\x12\".ray.serve.ListApplicationsRequest\x1a#.ray.serve.ListApplicationsResponse\x12@\n\x07Healthz\x12\x19.ray.serve.HealthzRequest\x1a\x1a.ray.serve.HealthzResponse2\xc3\x02\n\x12UserDefinedService\x12I\n\x08__call__\x12\x1d.ray.serve.UserDefinedMessage\x1a\x1e.ray.serve.UserDefinedResponse\x12H\n\x07Method1\x12\x1d.ray.serve.UserDefinedMessage\x1a\x1e.ray.serve.UserDefinedResponse\x12J\n\x07Method2\x12\x1e.ray.serve.UserDefinedMessage2\x1a\x1f.ray.serve.UserDefinedResponse2\x12L\n\tStreaming\x12\x1d.ray.serve.UserDefinedMessage\x1a\x1e.ray.serve.UserDefinedResponse0\x01\x32L\n\x0c\x46ruitService\x12<\n\nFruitStand\x12\x17.ray.serve.FruitAmounts\x1a\x15.ray.serve.FruitCosts2\x98\x01\n\x18RayServeBenchmarkService\x12\x39\n\tgrpc_call\x12\x14.ray.serve.ArrayData\x1a\x16.ray.serve.ModelOutput\x12\x41\n\x10\x63\x61ll_with_string\x12\x15.ray.serve.StringData\x1a\x16.ray.serve.ModelOutputB\'\n\x16io.ray.serve.generatedB\x0bServeProtosP\x01\x62\x06proto3')
+
+_ENCODINGTYPE = DESCRIPTOR.enum_types_by_name['EncodingType']
+EncodingType = enum_type_wrapper.EnumTypeWrapper(_ENCODINGTYPE)
+_DEPLOYMENTLANGUAGE = DESCRIPTOR.enum_types_by_name['DeploymentLanguage']
+DeploymentLanguage = enum_type_wrapper.EnumTypeWrapper(_DEPLOYMENTLANGUAGE)
+_TARGETCAPACITYDIRECTION = DESCRIPTOR.enum_types_by_name['TargetCapacityDirection']
+TargetCapacityDirection = enum_type_wrapper.EnumTypeWrapper(_TARGETCAPACITYDIRECTION)
+_DEPLOYMENTSTATUS = DESCRIPTOR.enum_types_by_name['DeploymentStatus']
+DeploymentStatus = enum_type_wrapper.EnumTypeWrapper(_DEPLOYMENTSTATUS)
+_DEPLOYMENTSTATUSTRIGGER = DESCRIPTOR.enum_types_by_name['DeploymentStatusTrigger']
+DeploymentStatusTrigger = enum_type_wrapper.EnumTypeWrapper(_DEPLOYMENTSTATUSTRIGGER)
+_APPLICATIONSTATUS = DESCRIPTOR.enum_types_by_name['ApplicationStatus']
+ApplicationStatus = enum_type_wrapper.EnumTypeWrapper(_APPLICATIONSTATUS)
+TEXT = 0
+JSON = 1
+PYTHON = 0
+JAVA = 1
+UNSET = 0
+UP = 1
+DOWN = 2
+DEPLOYMENT_STATUS_UPDATING = 0
+DEPLOYMENT_STATUS_HEALTHY = 1
+DEPLOYMENT_STATUS_UNHEALTHY = 2
+DEPLOYMENT_STATUS_DEPLOY_FAILED = 3
+DEPLOYMENT_STATUS_UPSCALING = 4
+DEPLOYMENT_STATUS_DOWNSCALING = 5
+DEPLOYMENT_STATUS_TRIGGER_UNSPECIFIED = 0
+DEPLOYMENT_STATUS_TRIGGER_CONFIG_UPDATE_STARTED = 1
+DEPLOYMENT_STATUS_TRIGGER_CONFIG_UPDATE_COMPLETED = 2
+DEPLOYMENT_STATUS_TRIGGER_UPSCALE_COMPLETED = 3
+DEPLOYMENT_STATUS_TRIGGER_DOWNSCALE_COMPLETED = 4
+DEPLOYMENT_STATUS_TRIGGER_AUTOSCALING = 5
+DEPLOYMENT_STATUS_TRIGGER_REPLICA_STARTUP_FAILED = 6
+DEPLOYMENT_STATUS_TRIGGER_HEALTH_CHECK_FAILED = 7
+DEPLOYMENT_STATUS_TRIGGER_INTERNAL_ERROR = 8
+DEPLOYMENT_STATUS_TRIGGER_DELETING = 9
+APPLICATION_STATUS_DEPLOYING = 0
+APPLICATION_STATUS_RUNNING = 1
+APPLICATION_STATUS_DEPLOY_FAILED = 2
+APPLICATION_STATUS_DELETING = 3
+APPLICATION_STATUS_NOT_STARTED = 5
+APPLICATION_STATUS_UNHEALTHY = 6
+
+
+_AUTOSCALINGCONFIG = DESCRIPTOR.message_types_by_name['AutoscalingConfig']
+_LOGGINGCONFIG = DESCRIPTOR.message_types_by_name['LoggingConfig']
+_DEPLOYMENTCONFIG = DESCRIPTOR.message_types_by_name['DeploymentConfig']
+_REQUESTMETADATA = DESCRIPTOR.message_types_by_name['RequestMetadata']
+_REQUESTMETADATA_CONTEXTENTRY = _REQUESTMETADATA.nested_types_by_name['ContextEntry']
+_REQUESTWRAPPER = DESCRIPTOR.message_types_by_name['RequestWrapper']
+_UPDATEDOBJECT = DESCRIPTOR.message_types_by_name['UpdatedObject']
+_LONGPOLLREQUEST = DESCRIPTOR.message_types_by_name['LongPollRequest']
+_LONGPOLLREQUEST_KEYSTOSNAPSHOTIDSENTRY = _LONGPOLLREQUEST.nested_types_by_name['KeysToSnapshotIdsEntry']
+_LONGPOLLRESULT = DESCRIPTOR.message_types_by_name['LongPollResult']
+_LONGPOLLRESULT_UPDATEDOBJECTSENTRY = _LONGPOLLRESULT.nested_types_by_name['UpdatedObjectsEntry']
+_ENDPOINTINFO = DESCRIPTOR.message_types_by_name['EndpointInfo']
+_ENDPOINTINFO_CONFIGENTRY = _ENDPOINTINFO.nested_types_by_name['ConfigEntry']
+_ENDPOINTSET = DESCRIPTOR.message_types_by_name['EndpointSet']
+_ENDPOINTSET_ENDPOINTSENTRY = _ENDPOINTSET.nested_types_by_name['EndpointsEntry']
+_ACTORNAMELIST = DESCRIPTOR.message_types_by_name['ActorNameList']
+_DEPLOYMENTTARGETINFO = DESCRIPTOR.message_types_by_name['DeploymentTargetInfo']
+_DEPLOYMENTVERSION = DESCRIPTOR.message_types_by_name['DeploymentVersion']
+_REPLICACONFIG = DESCRIPTOR.message_types_by_name['ReplicaConfig']
+_DEPLOYMENTINFO = DESCRIPTOR.message_types_by_name['DeploymentInfo']
+_DEPLOYMENTROUTE = DESCRIPTOR.message_types_by_name['DeploymentRoute']
+_DEPLOYMENTROUTELIST = DESCRIPTOR.message_types_by_name['DeploymentRouteList']
+_DEPLOYMENTSTATUSINFO = DESCRIPTOR.message_types_by_name['DeploymentStatusInfo']
+_DEPLOYMENTSTATUSINFOLIST = DESCRIPTOR.message_types_by_name['DeploymentStatusInfoList']
+_APPLICATIONSTATUSINFO = DESCRIPTOR.message_types_by_name['ApplicationStatusInfo']
+_STATUSOVERVIEW = DESCRIPTOR.message_types_by_name['StatusOverview']
+_LISTAPPLICATIONSREQUEST = DESCRIPTOR.message_types_by_name['ListApplicationsRequest']
+_LISTAPPLICATIONSRESPONSE = DESCRIPTOR.message_types_by_name['ListApplicationsResponse']
+_HEALTHZREQUEST = DESCRIPTOR.message_types_by_name['HealthzRequest']
+_HEALTHZRESPONSE = DESCRIPTOR.message_types_by_name['HealthzResponse']
+_USERDEFINEDMESSAGE = DESCRIPTOR.message_types_by_name['UserDefinedMessage']
+_USERDEFINEDRESPONSE = DESCRIPTOR.message_types_by_name['UserDefinedResponse']
+_USERDEFINEDMESSAGE2 = DESCRIPTOR.message_types_by_name['UserDefinedMessage2']
+_USERDEFINEDRESPONSE2 = DESCRIPTOR.message_types_by_name['UserDefinedResponse2']
+_FRUITAMOUNTS = DESCRIPTOR.message_types_by_name['FruitAmounts']
+_FRUITCOSTS = DESCRIPTOR.message_types_by_name['FruitCosts']
+_ARRAYDATA = DESCRIPTOR.message_types_by_name['ArrayData']
+_STRINGDATA = DESCRIPTOR.message_types_by_name['StringData']
+_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
+_DEPLOYMENTARGS = DESCRIPTOR.message_types_by_name['DeploymentArgs']
+AutoscalingConfig = _reflection.GeneratedProtocolMessageType('AutoscalingConfig', (_message.Message,), {
+  'DESCRIPTOR' : _AUTOSCALINGCONFIG,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.AutoscalingConfig)
+  })
+_sym_db.RegisterMessage(AutoscalingConfig)
+
+LoggingConfig = _reflection.GeneratedProtocolMessageType('LoggingConfig', (_message.Message,), {
+  'DESCRIPTOR' : _LOGGINGCONFIG,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.LoggingConfig)
+  })
+_sym_db.RegisterMessage(LoggingConfig)
+
+DeploymentConfig = _reflection.GeneratedProtocolMessageType('DeploymentConfig', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTCONFIG,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentConfig)
+  })
+_sym_db.RegisterMessage(DeploymentConfig)
+
+RequestMetadata = _reflection.GeneratedProtocolMessageType('RequestMetadata', (_message.Message,), {
+
+  'ContextEntry' : _reflection.GeneratedProtocolMessageType('ContextEntry', (_message.Message,), {
+    'DESCRIPTOR' : _REQUESTMETADATA_CONTEXTENTRY,
+    '__module__' : 'ray.serve.generated.serve_pb2'
+    # @@protoc_insertion_point(class_scope:ray.serve.RequestMetadata.ContextEntry)
+    })
+  ,
+  'DESCRIPTOR' : _REQUESTMETADATA,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.RequestMetadata)
+  })
+_sym_db.RegisterMessage(RequestMetadata)
+_sym_db.RegisterMessage(RequestMetadata.ContextEntry)
+
+RequestWrapper = _reflection.GeneratedProtocolMessageType('RequestWrapper', (_message.Message,), {
+  'DESCRIPTOR' : _REQUESTWRAPPER,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.RequestWrapper)
+  })
+_sym_db.RegisterMessage(RequestWrapper)
+
+UpdatedObject = _reflection.GeneratedProtocolMessageType('UpdatedObject', (_message.Message,), {
+  'DESCRIPTOR' : _UPDATEDOBJECT,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.UpdatedObject)
+  })
+_sym_db.RegisterMessage(UpdatedObject)
+
+LongPollRequest = _reflection.GeneratedProtocolMessageType('LongPollRequest', (_message.Message,), {
+
+  'KeysToSnapshotIdsEntry' : _reflection.GeneratedProtocolMessageType('KeysToSnapshotIdsEntry', (_message.Message,), {
+    'DESCRIPTOR' : _LONGPOLLREQUEST_KEYSTOSNAPSHOTIDSENTRY,
+    '__module__' : 'ray.serve.generated.serve_pb2'
+    # @@protoc_insertion_point(class_scope:ray.serve.LongPollRequest.KeysToSnapshotIdsEntry)
+    })
+  ,
+  'DESCRIPTOR' : _LONGPOLLREQUEST,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.LongPollRequest)
+  })
+_sym_db.RegisterMessage(LongPollRequest)
+_sym_db.RegisterMessage(LongPollRequest.KeysToSnapshotIdsEntry)
+
+LongPollResult = _reflection.GeneratedProtocolMessageType('LongPollResult', (_message.Message,), {
+
+  'UpdatedObjectsEntry' : _reflection.GeneratedProtocolMessageType('UpdatedObjectsEntry', (_message.Message,), {
+    'DESCRIPTOR' : _LONGPOLLRESULT_UPDATEDOBJECTSENTRY,
+    '__module__' : 'ray.serve.generated.serve_pb2'
+    # @@protoc_insertion_point(class_scope:ray.serve.LongPollResult.UpdatedObjectsEntry)
+    })
+  ,
+  'DESCRIPTOR' : _LONGPOLLRESULT,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.LongPollResult)
+  })
+_sym_db.RegisterMessage(LongPollResult)
+_sym_db.RegisterMessage(LongPollResult.UpdatedObjectsEntry)
+
+EndpointInfo = _reflection.GeneratedProtocolMessageType('EndpointInfo', (_message.Message,), {
+
+  'ConfigEntry' : _reflection.GeneratedProtocolMessageType('ConfigEntry', (_message.Message,), {
+    'DESCRIPTOR' : _ENDPOINTINFO_CONFIGENTRY,
+    '__module__' : 'ray.serve.generated.serve_pb2'
+    # @@protoc_insertion_point(class_scope:ray.serve.EndpointInfo.ConfigEntry)
+    })
+  ,
+  'DESCRIPTOR' : _ENDPOINTINFO,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.EndpointInfo)
+  })
+_sym_db.RegisterMessage(EndpointInfo)
+_sym_db.RegisterMessage(EndpointInfo.ConfigEntry)
+
+EndpointSet = _reflection.GeneratedProtocolMessageType('EndpointSet', (_message.Message,), {
+
+  'EndpointsEntry' : _reflection.GeneratedProtocolMessageType('EndpointsEntry', (_message.Message,), {
+    'DESCRIPTOR' : _ENDPOINTSET_ENDPOINTSENTRY,
+    '__module__' : 'ray.serve.generated.serve_pb2'
+    # @@protoc_insertion_point(class_scope:ray.serve.EndpointSet.EndpointsEntry)
+    })
+  ,
+  'DESCRIPTOR' : _ENDPOINTSET,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.EndpointSet)
+  })
+_sym_db.RegisterMessage(EndpointSet)
+_sym_db.RegisterMessage(EndpointSet.EndpointsEntry)
+
+ActorNameList = _reflection.GeneratedProtocolMessageType('ActorNameList', (_message.Message,), {
+  'DESCRIPTOR' : _ACTORNAMELIST,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ActorNameList)
+  })
+_sym_db.RegisterMessage(ActorNameList)
+
+DeploymentTargetInfo = _reflection.GeneratedProtocolMessageType('DeploymentTargetInfo', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTTARGETINFO,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentTargetInfo)
+  })
+_sym_db.RegisterMessage(DeploymentTargetInfo)
+
+DeploymentVersion = _reflection.GeneratedProtocolMessageType('DeploymentVersion', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTVERSION,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentVersion)
+  })
+_sym_db.RegisterMessage(DeploymentVersion)
+
+ReplicaConfig = _reflection.GeneratedProtocolMessageType('ReplicaConfig', (_message.Message,), {
+  'DESCRIPTOR' : _REPLICACONFIG,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ReplicaConfig)
+  })
+_sym_db.RegisterMessage(ReplicaConfig)
+
+DeploymentInfo = _reflection.GeneratedProtocolMessageType('DeploymentInfo', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTINFO,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentInfo)
+  })
+_sym_db.RegisterMessage(DeploymentInfo)
+
+DeploymentRoute = _reflection.GeneratedProtocolMessageType('DeploymentRoute', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTROUTE,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentRoute)
+  })
+_sym_db.RegisterMessage(DeploymentRoute)
+
+DeploymentRouteList = _reflection.GeneratedProtocolMessageType('DeploymentRouteList', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTROUTELIST,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentRouteList)
+  })
+_sym_db.RegisterMessage(DeploymentRouteList)
+
+DeploymentStatusInfo = _reflection.GeneratedProtocolMessageType('DeploymentStatusInfo', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTSTATUSINFO,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentStatusInfo)
+  })
+_sym_db.RegisterMessage(DeploymentStatusInfo)
+
+DeploymentStatusInfoList = _reflection.GeneratedProtocolMessageType('DeploymentStatusInfoList', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTSTATUSINFOLIST,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentStatusInfoList)
+  })
+_sym_db.RegisterMessage(DeploymentStatusInfoList)
+
+ApplicationStatusInfo = _reflection.GeneratedProtocolMessageType('ApplicationStatusInfo', (_message.Message,), {
+  'DESCRIPTOR' : _APPLICATIONSTATUSINFO,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ApplicationStatusInfo)
+  })
+_sym_db.RegisterMessage(ApplicationStatusInfo)
+
+StatusOverview = _reflection.GeneratedProtocolMessageType('StatusOverview', (_message.Message,), {
+  'DESCRIPTOR' : _STATUSOVERVIEW,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.StatusOverview)
+  })
+_sym_db.RegisterMessage(StatusOverview)
+
+ListApplicationsRequest = _reflection.GeneratedProtocolMessageType('ListApplicationsRequest', (_message.Message,), {
+  'DESCRIPTOR' : _LISTAPPLICATIONSREQUEST,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ListApplicationsRequest)
+  })
+_sym_db.RegisterMessage(ListApplicationsRequest)
+
+ListApplicationsResponse = _reflection.GeneratedProtocolMessageType('ListApplicationsResponse', (_message.Message,), {
+  'DESCRIPTOR' : _LISTAPPLICATIONSRESPONSE,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ListApplicationsResponse)
+  })
+_sym_db.RegisterMessage(ListApplicationsResponse)
+
+HealthzRequest = _reflection.GeneratedProtocolMessageType('HealthzRequest', (_message.Message,), {
+  'DESCRIPTOR' : _HEALTHZREQUEST,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.HealthzRequest)
+  })
+_sym_db.RegisterMessage(HealthzRequest)
+
+HealthzResponse = _reflection.GeneratedProtocolMessageType('HealthzResponse', (_message.Message,), {
+  'DESCRIPTOR' : _HEALTHZRESPONSE,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.HealthzResponse)
+  })
+_sym_db.RegisterMessage(HealthzResponse)
+
+UserDefinedMessage = _reflection.GeneratedProtocolMessageType('UserDefinedMessage', (_message.Message,), {
+  'DESCRIPTOR' : _USERDEFINEDMESSAGE,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.UserDefinedMessage)
+  })
+_sym_db.RegisterMessage(UserDefinedMessage)
+
+UserDefinedResponse = _reflection.GeneratedProtocolMessageType('UserDefinedResponse', (_message.Message,), {
+  'DESCRIPTOR' : _USERDEFINEDRESPONSE,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.UserDefinedResponse)
+  })
+_sym_db.RegisterMessage(UserDefinedResponse)
+
+UserDefinedMessage2 = _reflection.GeneratedProtocolMessageType('UserDefinedMessage2', (_message.Message,), {
+  'DESCRIPTOR' : _USERDEFINEDMESSAGE2,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.UserDefinedMessage2)
+  })
+_sym_db.RegisterMessage(UserDefinedMessage2)
+
+UserDefinedResponse2 = _reflection.GeneratedProtocolMessageType('UserDefinedResponse2', (_message.Message,), {
+  'DESCRIPTOR' : _USERDEFINEDRESPONSE2,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.UserDefinedResponse2)
+  })
+_sym_db.RegisterMessage(UserDefinedResponse2)
+
+FruitAmounts = _reflection.GeneratedProtocolMessageType('FruitAmounts', (_message.Message,), {
+  'DESCRIPTOR' : _FRUITAMOUNTS,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.FruitAmounts)
+  })
+_sym_db.RegisterMessage(FruitAmounts)
+
+FruitCosts = _reflection.GeneratedProtocolMessageType('FruitCosts', (_message.Message,), {
+  'DESCRIPTOR' : _FRUITCOSTS,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.FruitCosts)
+  })
+_sym_db.RegisterMessage(FruitCosts)
+
+ArrayData = _reflection.GeneratedProtocolMessageType('ArrayData', (_message.Message,), {
+  'DESCRIPTOR' : _ARRAYDATA,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ArrayData)
+  })
+_sym_db.RegisterMessage(ArrayData)
+
+StringData = _reflection.GeneratedProtocolMessageType('StringData', (_message.Message,), {
+  'DESCRIPTOR' : _STRINGDATA,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.StringData)
+  })
+_sym_db.RegisterMessage(StringData)
+
+ModelOutput = _reflection.GeneratedProtocolMessageType('ModelOutput', (_message.Message,), {
+  'DESCRIPTOR' : _MODELOUTPUT,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.ModelOutput)
+  })
+_sym_db.RegisterMessage(ModelOutput)
+
+DeploymentArgs = _reflection.GeneratedProtocolMessageType('DeploymentArgs', (_message.Message,), {
+  'DESCRIPTOR' : _DEPLOYMENTARGS,
+  '__module__' : 'ray.serve.generated.serve_pb2'
+  # @@protoc_insertion_point(class_scope:ray.serve.DeploymentArgs)
+  })
+_sym_db.RegisterMessage(DeploymentArgs)
+
+_RAYSERVEAPISERVICE = DESCRIPTOR.services_by_name['RayServeAPIService']
+_USERDEFINEDSERVICE = DESCRIPTOR.services_by_name['UserDefinedService']
+_FRUITSERVICE = DESCRIPTOR.services_by_name['FruitService']
+_RAYSERVEBENCHMARKSERVICE = DESCRIPTOR.services_by_name['RayServeBenchmarkService']
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+  DESCRIPTOR._options = None
+  DESCRIPTOR._serialized_options = b'\n\026io.ray.serve.generatedB\013ServeProtosP\001'
+  _REQUESTMETADATA_CONTEXTENTRY._options = None
+  _REQUESTMETADATA_CONTEXTENTRY._serialized_options = b'8\001'
+  _LONGPOLLREQUEST_KEYSTOSNAPSHOTIDSENTRY._options = None
+  _LONGPOLLREQUEST_KEYSTOSNAPSHOTIDSENTRY._serialized_options = b'8\001'
+  _LONGPOLLRESULT_UPDATEDOBJECTSENTRY._options = None
+  _LONGPOLLRESULT_UPDATEDOBJECTSENTRY._serialized_options = b'8\001'
+  _ENDPOINTINFO_CONFIGENTRY._options = None
+  _ENDPOINTINFO_CONFIGENTRY._serialized_options = b'8\001'
+  _ENDPOINTSET_ENDPOINTSENTRY._options = None
+  _ENDPOINTSET_ENDPOINTSENTRY._serialized_options = b'8\001'
+  _ENCODINGTYPE._serialized_start=6148
+  _ENCODINGTYPE._serialized_end=6182
+  _DEPLOYMENTLANGUAGE._serialized_start=6184
+  _DEPLOYMENTLANGUAGE._serialized_end=6226
+  _TARGETCAPACITYDIRECTION._serialized_start=6228
+  _TARGETCAPACITYDIRECTION._serialized_end=6282
+  _DEPLOYMENTSTATUS._serialized_start=6285
+  _DEPLOYMENTSTATUS._serialized_end=6504
+  _DEPLOYMENTSTATUSTRIGGER._serialized_start=6507
+  _DEPLOYMENTSTATUSTRIGGER._serialized_end=7017
+  _APPLICATIONSTATUS._serialized_start=7020
+  _APPLICATIONSTATUS._serialized_end=7246
+  _AUTOSCALINGCONFIG._serialized_start=44
+  _AUTOSCALINGCONFIG._serialized_end=888
+  _LOGGINGCONFIG._serialized_start=891
+  _LOGGINGCONFIG._serialized_end=1059
+  _DEPLOYMENTCONFIG._serialized_start=1062
+  _DEPLOYMENTCONFIG._serialized_end=1836
+  _REQUESTMETADATA._serialized_start=1839
+  _REQUESTMETADATA._serialized_end=2167
+  _REQUESTMETADATA_CONTEXTENTRY._serialized_start=2109
+  _REQUESTMETADATA_CONTEXTENTRY._serialized_end=2167
+  _REQUESTWRAPPER._serialized_start=2169
+  _REQUESTWRAPPER._serialized_end=2205
+  _UPDATEDOBJECT._serialized_start=2207
+  _UPDATEDOBJECT._serialized_end=2296
+  _LONGPOLLREQUEST._serialized_start=2299
+  _LONGPOLLREQUEST._serialized_end=2486
+  _LONGPOLLREQUEST_KEYSTOSNAPSHOTIDSENTRY._serialized_start=2418
+  _LONGPOLLREQUEST_KEYSTOSNAPSHOTIDSENTRY._serialized_end=2486
+  _LONGPOLLRESULT._serialized_start=2489
+  _LONGPOLLRESULT._serialized_end=2686
+  _LONGPOLLRESULT_UPDATEDOBJECTSENTRY._serialized_start=2595
+  _LONGPOLLRESULT_UPDATEDOBJECTSENTRY._serialized_end=2686
+  _ENDPOINTINFO._serialized_start=2689
+  _ENDPOINTINFO._serialized_end=2882
+  _ENDPOINTINFO_CONFIGENTRY._serialized_start=2825
+  _ENDPOINTINFO_CONFIGENTRY._serialized_end=2882
+  _ENDPOINTSET._serialized_start=2885
+  _ENDPOINTSET._serialized_end=3054
+  _ENDPOINTSET_ENDPOINTSENTRY._serialized_start=2969
+  _ENDPOINTSET_ENDPOINTSENTRY._serialized_end=3054
+  _ACTORNAMELIST._serialized_start=3056
+  _ACTORNAMELIST._serialized_end=3093
+  _DEPLOYMENTTARGETINFO._serialized_start=3095
+  _DEPLOYMENTTARGETINFO._serialized_end=3189
+  _DEPLOYMENTVERSION._serialized_start=3192
+  _DEPLOYMENTVERSION._serialized_end=3529
+  _REPLICACONFIG._serialized_start=3532
+  _REPLICACONFIG._serialized_end=3905
+  _DEPLOYMENTINFO._serialized_start=3908
+  _DEPLOYMENTINFO._serialized_end=4345
+  _DEPLOYMENTROUTE._serialized_start=4347
+  _DEPLOYMENTROUTE._serialized_end=4454
+  _DEPLOYMENTROUTELIST._serialized_start=4456
+  _DEPLOYMENTROUTELIST._serialized_end=4550
+  _DEPLOYMENTSTATUSINFO._serialized_start=4553
+  _DEPLOYMENTSTATUSINFO._serialized_end=4749
+  _DEPLOYMENTSTATUSINFOLIST._serialized_start=4751
+  _DEPLOYMENTSTATUSINFOLIST._serialized_end=4866
+  _APPLICATIONSTATUSINFO._serialized_start=4869
+  _APPLICATIONSTATUSINFO._serialized_end=5023
+  _STATUSOVERVIEW._serialized_start=5026
+  _STATUSOVERVIEW._serialized_end=5213
+  _LISTAPPLICATIONSREQUEST._serialized_start=5215
+  _LISTAPPLICATIONSREQUEST._serialized_end=5240
+  _LISTAPPLICATIONSRESPONSE._serialized_start=5242
+  _LISTAPPLICATIONSRESPONSE._serialized_end=5313
+  _HEALTHZREQUEST._serialized_start=5315
+  _HEALTHZREQUEST._serialized_end=5331
+  _HEALTHZRESPONSE._serialized_start=5333
+  _HEALTHZRESPONSE._serialized_end=5376
+  _USERDEFINEDMESSAGE._serialized_start=5378
+  _USERDEFINEDMESSAGE._serialized_end=5454
+  _USERDEFINEDRESPONSE._serialized_start=5456
+  _USERDEFINEDRESPONSE._serialized_end=5528
+  _USERDEFINEDMESSAGE2._serialized_start=5530
+  _USERDEFINEDMESSAGE2._serialized_end=5551
+  _USERDEFINEDRESPONSE2._serialized_start=5553
+  _USERDEFINEDRESPONSE2._serialized_end=5603
+  _FRUITAMOUNTS._serialized_start=5605
+  _FRUITAMOUNTS._serialized_end=5689
+  _FRUITCOSTS._serialized_start=5691
+  _FRUITCOSTS._serialized_end=5725
+  _ARRAYDATA._serialized_start=5727
+  _ARRAYDATA._serialized_end=5758
+  _STRINGDATA._serialized_start=5760
+  _STRINGDATA._serialized_end=5792
+  _MODELOUTPUT._serialized_start=5794
+  _MODELOUTPUT._serialized_end=5831
+  _DEPLOYMENTARGS._serialized_start=5834
+  _DEPLOYMENTARGS._serialized_end=6146
+  _RAYSERVEAPISERVICE._serialized_start=7249
+  _RAYSERVEAPISERVICE._serialized_end=7428
+  _USERDEFINEDSERVICE._serialized_start=7431
+  _USERDEFINEDSERVICE._serialized_end=7754
+  _FRUITSERVICE._serialized_start=7756
+  _FRUITSERVICE._serialized_end=7832
+  _RAYSERVEBENCHMARKSERVICE._serialized_start=7835
+  _RAYSERVEBENCHMARKSERVICE._serialized_end=7987
+# @@protoc_insertion_point(module_scope)
diff --git a/.venv/lib/python3.11/site-packages/ray/serve/generated/serve_pb2_grpc.py b/.venv/lib/python3.11/site-packages/ray/serve/generated/serve_pb2_grpc.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0a0fbfd5beb60db0b27f1914bb396a55049dfb8
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/ray/serve/generated/serve_pb2_grpc.py
@@ -0,0 +1,414 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+
+from . import serve_pb2 as src_dot_ray_dot_protobuf_dot_serve__pb2
+
+
+class RayServeAPIServiceStub(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.ListApplications = channel.unary_unary(
+                '/ray.serve.RayServeAPIService/ListApplications',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ListApplicationsRequest.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ListApplicationsResponse.FromString,
+                )
+        self.Healthz = channel.unary_unary(
+                '/ray.serve.RayServeAPIService/Healthz',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.HealthzRequest.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.HealthzResponse.FromString,
+                )
+
+
+class RayServeAPIServiceServicer(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def ListApplications(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Healthz(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_RayServeAPIServiceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'ListApplications': grpc.unary_unary_rpc_method_handler(
+                    servicer.ListApplications,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ListApplicationsRequest.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ListApplicationsResponse.SerializeToString,
+            ),
+            'Healthz': grpc.unary_unary_rpc_method_handler(
+                    servicer.Healthz,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.HealthzRequest.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.HealthzResponse.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'ray.serve.RayServeAPIService', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class RayServeAPIService(object):
+    """Missing associated documentation comment in .proto file."""
+
+    @staticmethod
+    def ListApplications(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.RayServeAPIService/ListApplications',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.ListApplicationsRequest.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.ListApplicationsResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Healthz(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.RayServeAPIService/Healthz',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.HealthzRequest.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.HealthzResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+
+class UserDefinedServiceStub(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.__call__ = channel.unary_unary(
+                '/ray.serve.UserDefinedService/__call__',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.FromString,
+                )
+        self.Method1 = channel.unary_unary(
+                '/ray.serve.UserDefinedService/Method1',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.FromString,
+                )
+        self.Method2 = channel.unary_unary(
+                '/ray.serve.UserDefinedService/Method2',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage2.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse2.FromString,
+                )
+        self.Streaming = channel.unary_stream(
+                '/ray.serve.UserDefinedService/Streaming',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.FromString,
+                )
+
+
+class UserDefinedServiceServicer(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def __call__(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Method1(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Method2(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Streaming(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_UserDefinedServiceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            '__call__': grpc.unary_unary_rpc_method_handler(
+                    servicer.__call__,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.SerializeToString,
+            ),
+            'Method1': grpc.unary_unary_rpc_method_handler(
+                    servicer.Method1,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.SerializeToString,
+            ),
+            'Method2': grpc.unary_unary_rpc_method_handler(
+                    servicer.Method2,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage2.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse2.SerializeToString,
+            ),
+            'Streaming': grpc.unary_stream_rpc_method_handler(
+                    servicer.Streaming,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'ray.serve.UserDefinedService', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class UserDefinedService(object):
+    """Missing associated documentation comment in .proto file."""
+
+    @staticmethod
+    def __call__(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.UserDefinedService/__call__',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Method1(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.UserDefinedService/Method1',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Method2(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.UserDefinedService/Method2',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage2.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse2.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Streaming(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_stream(request, target, '/ray.serve.UserDefinedService/Streaming',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedMessage.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.UserDefinedResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+
+class FruitServiceStub(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.FruitStand = channel.unary_unary(
+                '/ray.serve.FruitService/FruitStand',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.FruitAmounts.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.FruitCosts.FromString,
+                )
+
+
+class FruitServiceServicer(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def FruitStand(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_FruitServiceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'FruitStand': grpc.unary_unary_rpc_method_handler(
+                    servicer.FruitStand,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.FruitAmounts.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.FruitCosts.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'ray.serve.FruitService', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class FruitService(object):
+    """Missing associated documentation comment in .proto file."""
+
+    @staticmethod
+    def FruitStand(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.FruitService/FruitStand',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.FruitAmounts.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.FruitCosts.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+
+class RayServeBenchmarkServiceStub(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.grpc_call = channel.unary_unary(
+                '/ray.serve.RayServeBenchmarkService/grpc_call',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ArrayData.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ModelOutput.FromString,
+                )
+        self.call_with_string = channel.unary_unary(
+                '/ray.serve.RayServeBenchmarkService/call_with_string',
+                request_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.StringData.SerializeToString,
+                response_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ModelOutput.FromString,
+                )
+
+
+class RayServeBenchmarkServiceServicer(object):
+    """Missing associated documentation comment in .proto file."""
+
+    def grpc_call(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def call_with_string(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_RayServeBenchmarkServiceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'grpc_call': grpc.unary_unary_rpc_method_handler(
+                    servicer.grpc_call,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ArrayData.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ModelOutput.SerializeToString,
+            ),
+            'call_with_string': grpc.unary_unary_rpc_method_handler(
+                    servicer.call_with_string,
+                    request_deserializer=src_dot_ray_dot_protobuf_dot_serve__pb2.StringData.FromString,
+                    response_serializer=src_dot_ray_dot_protobuf_dot_serve__pb2.ModelOutput.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'ray.serve.RayServeBenchmarkService', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class RayServeBenchmarkService(object):
+    """Missing associated documentation comment in .proto file."""
+
+    @staticmethod
+    def grpc_call(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.RayServeBenchmarkService/grpc_call',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.ArrayData.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.ModelOutput.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def call_with_string(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/ray.serve.RayServeBenchmarkService/call_with_string',
+            src_dot_ray_dot_protobuf_dot_serve__pb2.StringData.SerializeToString,
+            src_dot_ray_dot_protobuf_dot_serve__pb2.ModelOutput.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)