diff --git a/.venv/lib/python3.11/site-packages/openai/_utils/__init__.py b/.venv/lib/python3.11/site-packages/openai/_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd01c088dc935b6f8ac159647793438c5369d9df
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/_utils/__init__.py
@@ -0,0 +1,60 @@
+from ._logs import SensitiveHeadersFilter as SensitiveHeadersFilter
+from ._sync import asyncify as asyncify
+from ._proxy import LazyProxy as LazyProxy
+from ._utils import (
+    flatten as flatten,
+    is_dict as is_dict,
+    is_list as is_list,
+    is_given as is_given,
+    is_tuple as is_tuple,
+    json_safe as json_safe,
+    lru_cache as lru_cache,
+    is_mapping as is_mapping,
+    is_tuple_t as is_tuple_t,
+    parse_date as parse_date,
+    is_iterable as is_iterable,
+    is_sequence as is_sequence,
+    coerce_float as coerce_float,
+    is_mapping_t as is_mapping_t,
+    removeprefix as removeprefix,
+    removesuffix as removesuffix,
+    extract_files as extract_files,
+    is_sequence_t as is_sequence_t,
+    required_args as required_args,
+    coerce_boolean as coerce_boolean,
+    coerce_integer as coerce_integer,
+    file_from_path as file_from_path,
+    parse_datetime as parse_datetime,
+    is_azure_client as is_azure_client,
+    strip_not_given as strip_not_given,
+    deepcopy_minimal as deepcopy_minimal,
+    get_async_library as get_async_library,
+    maybe_coerce_float as maybe_coerce_float,
+    get_required_header as get_required_header,
+    maybe_coerce_boolean as maybe_coerce_boolean,
+    maybe_coerce_integer as maybe_coerce_integer,
+    is_async_azure_client as is_async_azure_client,
+)
+from ._typing import (
+    is_list_type as is_list_type,
+    is_union_type as is_union_type,
+    extract_type_arg as extract_type_arg,
+    is_iterable_type as is_iterable_type,
+    is_required_type as is_required_type,
+    is_annotated_type as is_annotated_type,
+    is_type_alias_type as is_type_alias_type,
+    strip_annotated_type as strip_annotated_type,
+    extract_type_var_from_base as extract_type_var_from_base,
+)
+from ._streams import consume_sync_iterator as consume_sync_iterator, consume_async_iterator as consume_async_iterator
+from ._transform import (
+    PropertyInfo as PropertyInfo,
+    transform as transform,
+    async_transform as async_transform,
+    maybe_transform as maybe_transform,
+    async_maybe_transform as async_maybe_transform,
+)
+from ._reflection import (
+    function_has_argument as function_has_argument,
+    assert_signatures_in_sync as assert_signatures_in_sync,
+)
diff --git a/.venv/lib/python3.11/site-packages/openai/_utils/_proxy.py b/.venv/lib/python3.11/site-packages/openai/_utils/_proxy.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffd883e9dded36a0643196b491bfd197dce6c5e3
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/_utils/_proxy.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar, Iterable, cast
+from typing_extensions import override
+
+T = TypeVar("T")
+
+
+class LazyProxy(Generic[T], ABC):
+    """Implements data methods to pretend that an instance is another instance.
+
+    This includes forwarding attribute access and other methods.
+    """
+
+    # Note: we have to special case proxies that themselves return proxies
+    # to support using a proxy as a catch-all for any random access, e.g. `proxy.foo.bar.baz`
+
+    def __getattr__(self, attr: str) -> object:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied  # pyright: ignore
+        return getattr(proxied, attr)
+
+    @override
+    def __repr__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return repr(self.__get_proxied__())
+
+    @override
+    def __str__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return str(proxied)
+
+    @override
+    def __dir__(self) -> Iterable[str]:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return []
+        return proxied.__dir__()
+
+    @property  # type: ignore
+    @override
+    def __class__(self) -> type:  # pyright: ignore
+        proxied = self.__get_proxied__()
+        if issubclass(type(proxied), LazyProxy):
+            return type(proxied)
+        return proxied.__class__
+
+    def __get_proxied__(self) -> T:
+        return self.__load__()
+
+    def __as_proxied__(self) -> T:
+        """Helper method that returns the current proxy, typed as the loaded object"""
+        return cast(T, self)
+
+    @abstractmethod
+    def __load__(self) -> T: ...
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__init__.py b/.venv/lib/python3.11/site-packages/openai/cli/_api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..56a0260a6dc333995ff975158452eca83fc66012
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/cli/_api/__init__.py
@@ -0,0 +1 @@
+from ._main import register_commands as register_commands
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6218fb8067cb795d4eabc33c9b8d55ceef7b6535
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/_main.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/_main.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6746bd5080ed893e234f1da678c9bb24ba847482
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/_main.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/audio.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/audio.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0089ce2d95e4bb1846c415bd429931f94dde5e30
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/audio.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/completions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/completions.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7052a312dc0796db98002610b41afdf3e5051681
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/completions.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/files.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/files.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78d86cf7a301631d8f25ae886b346eed20cbc72e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/files.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/image.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/image.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f5009f9c18403b60f2fab84631a68321a355a62
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/image.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/models.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/models.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c074a9d7b3c5218026d0843ce4771cc753c7668a
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/cli/_api/__pycache__/models.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/chat/__init__.py b/.venv/lib/python3.11/site-packages/openai/cli/_api/chat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..87d971630aaef250bf91add818cb0db0a70e1410
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/cli/_api/chat/__init__.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from . import completions
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    completions.register(subparser)
diff --git a/.venv/lib/python3.11/site-packages/openai/cli/_api/chat/completions.py b/.venv/lib/python3.11/site-packages/openai/cli/_api/chat/completions.py
new file mode 100644
index 0000000000000000000000000000000000000000..feedb5ccab817c96baabb0c263f7a8f29b11682a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/cli/_api/chat/completions.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, List, Optional, cast
+from argparse import ArgumentParser
+from typing_extensions import Literal, NamedTuple
+
+from ..._utils import get_client
+from ..._models import BaseModel
+from ...._streaming import Stream
+from ....types.chat import (
+    ChatCompletionRole,
+    ChatCompletionChunk,
+    CompletionCreateParams,
+)
+from ....types.chat.completion_create_params import (
+    CompletionCreateParamsStreaming,
+    CompletionCreateParamsNonStreaming,
+)
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("chat.completions.create")
+
+    sub._action_groups.pop()
+    req = sub.add_argument_group("required arguments")
+    opt = sub.add_argument_group("optional arguments")
+
+    req.add_argument(
+        "-g",
+        "--message",
+        action="append",
+        nargs=2,
+        metavar=("ROLE", "CONTENT"),
+        help="A message in `{role} {content}` format. Use this argument multiple times to add multiple messages.",
+        required=True,
+    )
+    req.add_argument(
+        "-m",
+        "--model",
+        help="The model to use.",
+        required=True,
+    )
+
+    opt.add_argument(
+        "-n",
+        "--n",
+        help="How many completions to generate for the conversation.",
+        type=int,
+    )
+    opt.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate.", type=int)
+    opt.add_argument(
+        "-t",
+        "--temperature",
+        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
+
+Mutually exclusive with `top_p`.""",
+        type=float,
+    )
+    opt.add_argument(
+        "-P",
+        "--top_p",
+        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
+
+            Mutually exclusive with `temperature`.""",
+        type=float,
+    )
+    opt.add_argument(
+        "--stop",
+        help="A stop sequence at which to stop generating tokens for the message.",
+    )
+    opt.add_argument("--stream", help="Stream messages as they're ready.", action="store_true")
+    sub.set_defaults(func=CLIChatCompletion.create, args_model=CLIChatCompletionCreateArgs)
+
+
+class CLIMessage(NamedTuple):
+    role: ChatCompletionRole
+    content: str
+
+
+class CLIChatCompletionCreateArgs(BaseModel):
+    message: List[CLIMessage]
+    model: str
+    n: Optional[int] = None
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    stop: Optional[str] = None
+    stream: bool = False
+
+
+class CLIChatCompletion:
+    @staticmethod
+    def create(args: CLIChatCompletionCreateArgs) -> None:
+        params: CompletionCreateParams = {
+            "model": args.model,
+            "messages": [
+                {"role": cast(Literal["user"], message.role), "content": message.content} for message in args.message
+            ],
+            # type checkers are not good at inferring union types so we have to set stream afterwards
+            "stream": False,
+        }
+        if args.temperature is not None:
+            params['temperature'] = args.temperature
+        if args.stop is not None:
+            params['stop'] = args.stop
+        if args.top_p is not None:
+            params['top_p'] = args.top_p
+        if args.n is not None:
+            params['n'] = args.n
+        if args.stream:
+            params["stream"] = args.stream  # type: ignore
+        if args.max_tokens is not None:
+            params["max_tokens"] = args.max_tokens
+
+        if args.stream:
+            return CLIChatCompletion._stream_create(cast(CompletionCreateParamsStreaming, params))
+
+        return CLIChatCompletion._create(cast(CompletionCreateParamsNonStreaming, params))
+
+    @staticmethod
+    def _create(params: CompletionCreateParamsNonStreaming) -> None:
+        completion = get_client().chat.completions.create(**params)
+        should_print_header = len(completion.choices) > 1
+        for choice in completion.choices:
+            if should_print_header:
+                sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
+
+            content = choice.message.content if choice.message.content is not None else "None"
+            sys.stdout.write(content)
+
+            if should_print_header or not content.endswith("\n"):
+                sys.stdout.write("\n")
+
+            sys.stdout.flush()
+
+    @staticmethod
+    def _stream_create(params: CompletionCreateParamsStreaming) -> None:
+        # cast is required for mypy
+        stream = cast(  # pyright: ignore[reportUnnecessaryCast]
+            Stream[ChatCompletionChunk], get_client().chat.completions.create(**params)
+        )
+        for chunk in stream:
+            should_print_header = len(chunk.choices) > 1
+            for choice in chunk.choices:
+                if should_print_header:
+                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
+
+                content = choice.delta.content or ""
+                sys.stdout.write(content)
+
+                if should_print_header:
+                    sys.stdout.write("\n")
+
+                sys.stdout.flush()
+
+        sys.stdout.write("\n")
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/.keep b/.venv/lib/python3.11/site-packages/openai/lib/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..5e2c99fdbefc565576f8c5f37b0b0a66d60f2015
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/.keep
@@ -0,0 +1,4 @@
+File generated from our OpenAPI spec by Stainless.
+
+This directory can be used to store custom files to expand the SDK.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/__init__.py b/.venv/lib/python3.11/site-packages/openai/lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c6cb782c0724c97262c1d6f62c0be76a430d836
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/__init__.py
@@ -0,0 +1,2 @@
+from ._tools import pydantic_function_tool as pydantic_function_tool
+from ._parsing import ResponseFormatT as ResponseFormatT
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/__pycache__/azure.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/__pycache__/azure.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbbe547b4db9ee4fc42445aab8965eb4937eb48d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/__pycache__/azure.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_old_api.py b/.venv/lib/python3.11/site-packages/openai/lib/_old_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..929c87e80b960c47fba701c5943fb28c06fe6c27
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/_old_api.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+from typing_extensions import override
+
+from .._utils import LazyProxy
+from .._exceptions import OpenAIError
+
+INSTRUCTIONS = """
+
+You tried to access openai.{symbol}, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.
+
+You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 
+
+Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`
+
+A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
+"""
+
+
+class APIRemovedInV1(OpenAIError):
+    def __init__(self, *, symbol: str) -> None:
+        super().__init__(INSTRUCTIONS.format(symbol=symbol))
+
+
+class APIRemovedInV1Proxy(LazyProxy[Any]):
+    def __init__(self, *, symbol: str) -> None:
+        super().__init__()
+        self._symbol = symbol
+
+    @override
+    def __load__(self) -> Any:
+        # return the proxy until it is eventually called so that
+        # we don't break people that are just checking the attributes
+        # of a module
+        return self
+
+    def __call__(self, *_args: Any, **_kwargs: Any) -> Any:
+        raise APIRemovedInV1(symbol=self._symbol)
+
+
+SYMBOLS = [
+    "Edit",
+    "File",
+    "Audio",
+    "Image",
+    "Model",
+    "Engine",
+    "Customer",
+    "FineTune",
+    "Embedding",
+    "Completion",
+    "Deployment",
+    "Moderation",
+    "ErrorObject",
+    "FineTuningJob",
+    "ChatCompletion",
+]
+
+# we explicitly tell type checkers that nothing is exported
+# from this file so that when we re-export the old symbols
+# in `openai/__init__.py` they aren't added to the auto-complete
+# suggestions given by editors
+if TYPE_CHECKING:
+    __all__: list[str] = []
+else:
+    __all__ = SYMBOLS
+
+
+__locals = locals()
+for symbol in SYMBOLS:
+    __locals[symbol] = APIRemovedInV1Proxy(symbol=symbol)
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__init__.py b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d454c3a20fee405ab5de75172979bbec23ecf39
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__init__.py
@@ -0,0 +1,12 @@
+from ._completions import (
+    ResponseFormatT as ResponseFormatT,
+    has_parseable_input,
+    has_parseable_input as has_parseable_input,
+    maybe_parse_content as maybe_parse_content,
+    validate_input_tools as validate_input_tools,
+    parse_chat_completion as parse_chat_completion,
+    get_input_tool_by_name as get_input_tool_by_name,
+    solve_response_format_t as solve_response_format_t,
+    parse_function_tool_arguments as parse_function_tool_arguments,
+    type_to_response_format_param as type_to_response_format_param,
+)
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..26018346f9edb3709455aa42ee1d272ba7c2149f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__pycache__/_completions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__pycache__/_completions.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..545bad22f11bbcabba1ad899c10c35c68be06b3f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/__pycache__/_completions.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_parsing/_completions.py b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/_completions.py
new file mode 100644
index 0000000000000000000000000000000000000000..33c4ccb946b23677e94cd33e463e8d14eb1af08f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/_parsing/_completions.py
@@ -0,0 +1,264 @@
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Any, Iterable, cast
+from typing_extensions import TypeVar, TypeGuard, assert_never
+
+import pydantic
+
+from .._tools import PydanticFunctionTool
+from ..._types import NOT_GIVEN, NotGiven
+from ..._utils import is_dict, is_given
+from ..._compat import PYDANTIC_V2, model_parse_json
+from ..._models import construct_type_unchecked
+from .._pydantic import is_basemodel_type, to_strict_json_schema, is_dataclass_like_type
+from ...types.chat import (
+    ParsedChoice,
+    ChatCompletion,
+    ParsedFunction,
+    ParsedChatCompletion,
+    ChatCompletionMessage,
+    ParsedFunctionToolCall,
+    ChatCompletionToolParam,
+    ParsedChatCompletionMessage,
+    completion_create_params,
+)
+from ..._exceptions import LengthFinishReasonError, ContentFilterFinishReasonError
+from ...types.shared_params import FunctionDefinition
+from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
+from ...types.chat.chat_completion_message_tool_call import Function
+
+ResponseFormatT = TypeVar(
+    "ResponseFormatT",
+    # if it isn't given then we don't do any parsing
+    default=None,
+)
+_default_response_format: None = None
+
+
+def validate_input_tools(
+    tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+) -> None:
+    if not is_given(tools):
+        return
+
+    for tool in tools:
+        if tool["type"] != "function":
+            raise ValueError(
+                f'Currently only `function` tool types support auto-parsing; Received `{tool["type"]}`',
+            )
+
+        strict = tool["function"].get("strict")
+        if strict is not True:
+            raise ValueError(
+                f'`{tool["function"]["name"]}` is not strict. Only `strict` function tools can be auto-parsed'
+            )
+
+
+def parse_chat_completion(
+    *,
+    response_format: type[ResponseFormatT] | completion_create_params.ResponseFormat | NotGiven,
+    input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    chat_completion: ChatCompletion | ParsedChatCompletion[object],
+) -> ParsedChatCompletion[ResponseFormatT]:
+    if is_given(input_tools):
+        input_tools = [t for t in input_tools]
+    else:
+        input_tools = []
+
+    choices: list[ParsedChoice[ResponseFormatT]] = []
+    for choice in chat_completion.choices:
+        if choice.finish_reason == "length":
+            raise LengthFinishReasonError(completion=chat_completion)
+
+        if choice.finish_reason == "content_filter":
+            raise ContentFilterFinishReasonError()
+
+        message = choice.message
+
+        tool_calls: list[ParsedFunctionToolCall] = []
+        if message.tool_calls:
+            for tool_call in message.tool_calls:
+                if tool_call.type == "function":
+                    tool_call_dict = tool_call.to_dict()
+                    tool_calls.append(
+                        construct_type_unchecked(
+                            value={
+                                **tool_call_dict,
+                                "function": {
+                                    **cast(Any, tool_call_dict["function"]),
+                                    "parsed_arguments": parse_function_tool_arguments(
+                                        input_tools=input_tools, function=tool_call.function
+                                    ),
+                                },
+                            },
+                            type_=ParsedFunctionToolCall,
+                        )
+                    )
+                elif TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(tool_call)
+                else:
+                    tool_calls.append(tool_call)
+
+        choices.append(
+            construct_type_unchecked(
+                type_=cast(Any, ParsedChoice)[solve_response_format_t(response_format)],
+                value={
+                    **choice.to_dict(),
+                    "message": {
+                        **message.to_dict(),
+                        "parsed": maybe_parse_content(
+                            response_format=response_format,
+                            message=message,
+                        ),
+                        "tool_calls": tool_calls,
+                    },
+                },
+            )
+        )
+
+    return cast(
+        ParsedChatCompletion[ResponseFormatT],
+        construct_type_unchecked(
+            type_=cast(Any, ParsedChatCompletion)[solve_response_format_t(response_format)],
+            value={
+                **chat_completion.to_dict(),
+                "choices": choices,
+            },
+        ),
+    )
+
+
+def get_input_tool_by_name(*, input_tools: list[ChatCompletionToolParam], name: str) -> ChatCompletionToolParam | None:
+    return next((t for t in input_tools if t.get("function", {}).get("name") == name), None)
+
+
+def parse_function_tool_arguments(
+    *, input_tools: list[ChatCompletionToolParam], function: Function | ParsedFunction
+) -> object:
+    input_tool = get_input_tool_by_name(input_tools=input_tools, name=function.name)
+    if not input_tool:
+        return None
+
+    input_fn = cast(object, input_tool.get("function"))
+    if isinstance(input_fn, PydanticFunctionTool):
+        return model_parse_json(input_fn.model, function.arguments)
+
+    input_fn = cast(FunctionDefinition, input_fn)
+
+    if not input_fn.get("strict"):
+        return None
+
+    return json.loads(function.arguments)
+
+
+def maybe_parse_content(
+    *,
+    response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+    message: ChatCompletionMessage | ParsedChatCompletionMessage[object],
+) -> ResponseFormatT | None:
+    if has_rich_response_format(response_format) and message.content and not message.refusal:
+        return _parse_content(response_format, message.content)
+
+    return None
+
+
+def solve_response_format_t(
+    response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+) -> type[ResponseFormatT]:
+    """Return the runtime type for the given response format.
+
+    If no response format is given, or if we won't auto-parse the response format
+    then we default to `None`.
+    """
+    if has_rich_response_format(response_format):
+        return response_format
+
+    return cast("type[ResponseFormatT]", _default_response_format)
+
+
+def has_parseable_input(
+    *,
+    response_format: type | ResponseFormatParam | NotGiven,
+    input_tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+) -> bool:
+    if has_rich_response_format(response_format):
+        return True
+
+    for input_tool in input_tools or []:
+        if is_parseable_tool(input_tool):
+            return True
+
+    return False
+
+
+def has_rich_response_format(
+    response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+) -> TypeGuard[type[ResponseFormatT]]:
+    if not is_given(response_format):
+        return False
+
+    if is_response_format_param(response_format):
+        return False
+
+    return True
+
+
+def is_response_format_param(response_format: object) -> TypeGuard[ResponseFormatParam]:
+    return is_dict(response_format)
+
+
+def is_parseable_tool(input_tool: ChatCompletionToolParam) -> bool:
+    input_fn = cast(object, input_tool.get("function"))
+    if isinstance(input_fn, PydanticFunctionTool):
+        return True
+
+    return cast(FunctionDefinition, input_fn).get("strict") or False
+
+
+def _parse_content(response_format: type[ResponseFormatT], content: str) -> ResponseFormatT:
+    if is_basemodel_type(response_format):
+        return cast(ResponseFormatT, model_parse_json(response_format, content))
+
+    if is_dataclass_like_type(response_format):
+        if not PYDANTIC_V2:
+            raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {response_format}")
+
+        return pydantic.TypeAdapter(response_format).validate_json(content)
+
+    raise TypeError(f"Unable to automatically parse response format type {response_format}")
+
+
+def type_to_response_format_param(
+    response_format: type | completion_create_params.ResponseFormat | NotGiven,
+) -> ResponseFormatParam | NotGiven:
+    if not is_given(response_format):
+        return NOT_GIVEN
+
+    if is_response_format_param(response_format):
+        return response_format
+
+    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
+    # a safe default behaviour but we know that at this point the `response_format`
+    # can only be a `type`
+    response_format = cast(type, response_format)
+
+    json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None
+
+    if is_basemodel_type(response_format):
+        name = response_format.__name__
+        json_schema_type = response_format
+    elif is_dataclass_like_type(response_format):
+        name = response_format.__name__
+        json_schema_type = pydantic.TypeAdapter(response_format)
+    else:
+        raise TypeError(f"Unsupported response_format type - {response_format}")
+
+    return {
+        "type": "json_schema",
+        "json_schema": {
+            "schema": to_strict_json_schema(json_schema_type),
+            "name": name,
+            "strict": True,
+        },
+    }
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_pydantic.py b/.venv/lib/python3.11/site-packages/openai/lib/_pydantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2d73e5fc6abc45487256738c6342bf4a377310e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/_pydantic.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, TypeVar
+from typing_extensions import TypeGuard
+
+import pydantic
+
+from .._types import NOT_GIVEN
+from .._utils import is_dict as _is_dict, is_list
+from .._compat import PYDANTIC_V2, model_json_schema
+
+_T = TypeVar("_T")
+
+
+def to_strict_json_schema(model: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any]) -> dict[str, Any]:
+    if inspect.isclass(model) and is_basemodel_type(model):
+        schema = model_json_schema(model)
+    elif PYDANTIC_V2 and isinstance(model, pydantic.TypeAdapter):
+        schema = model.json_schema()
+    else:
+        raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {model}")
+
+    return _ensure_strict_json_schema(schema, path=(), root=schema)
+
+
+def _ensure_strict_json_schema(
+    json_schema: object,
+    *,
+    path: tuple[str, ...],
+    root: dict[str, object],
+) -> dict[str, Any]:
+    """Mutates the given JSON schema to ensure it conforms to the `strict` standard
+    that the API expects.
+    """
+    if not is_dict(json_schema):
+        raise TypeError(f"Expected {json_schema} to be a dictionary; path={path}")
+
+    defs = json_schema.get("$defs")
+    if is_dict(defs):
+        for def_name, def_schema in defs.items():
+            _ensure_strict_json_schema(def_schema, path=(*path, "$defs", def_name), root=root)
+
+    definitions = json_schema.get("definitions")
+    if is_dict(definitions):
+        for definition_name, definition_schema in definitions.items():
+            _ensure_strict_json_schema(definition_schema, path=(*path, "definitions", definition_name), root=root)
+
+    typ = json_schema.get("type")
+    if typ == "object" and "additionalProperties" not in json_schema:
+        json_schema["additionalProperties"] = False
+
+    # object types
+    # { 'type': 'object', 'properties': { 'a':  {...} } }
+    properties = json_schema.get("properties")
+    if is_dict(properties):
+        json_schema["required"] = [prop for prop in properties.keys()]
+        json_schema["properties"] = {
+            key: _ensure_strict_json_schema(prop_schema, path=(*path, "properties", key), root=root)
+            for key, prop_schema in properties.items()
+        }
+
+    # arrays
+    # { 'type': 'array', 'items': {...} }
+    items = json_schema.get("items")
+    if is_dict(items):
+        json_schema["items"] = _ensure_strict_json_schema(items, path=(*path, "items"), root=root)
+
+    # unions
+    any_of = json_schema.get("anyOf")
+    if is_list(any_of):
+        json_schema["anyOf"] = [
+            _ensure_strict_json_schema(variant, path=(*path, "anyOf", str(i)), root=root)
+            for i, variant in enumerate(any_of)
+        ]
+
+    # intersections
+    all_of = json_schema.get("allOf")
+    if is_list(all_of):
+        if len(all_of) == 1:
+            json_schema.update(_ensure_strict_json_schema(all_of[0], path=(*path, "allOf", "0"), root=root))
+            json_schema.pop("allOf")
+        else:
+            json_schema["allOf"] = [
+                _ensure_strict_json_schema(entry, path=(*path, "allOf", str(i)), root=root)
+                for i, entry in enumerate(all_of)
+            ]
+
+    # strip `None` defaults as there's no meaningful distinction here
+    # the schema will still be `nullable` and the model will default
+    # to using `None` anyway
+    if json_schema.get("default", NOT_GIVEN) is None:
+        json_schema.pop("default")
+
+    # we can't use `$ref`s if there are also other properties defined, e.g.
+    # `{"$ref": "...", "description": "my description"}`
+    #
+    # so we unravel the ref
+    # `{"type": "string", "description": "my description"}`
+    ref = json_schema.get("$ref")
+    if ref and has_more_than_n_keys(json_schema, 1):
+        assert isinstance(ref, str), f"Received non-string $ref - {ref}"
+
+        resolved = resolve_ref(root=root, ref=ref)
+        if not is_dict(resolved):
+            raise ValueError(f"Expected `$ref: {ref}` to resolved to a dictionary but got {resolved}")
+
+        # properties from the json schema take priority over the ones on the `$ref`
+        json_schema.update({**resolved, **json_schema})
+        json_schema.pop("$ref")
+        # Since the schema expanded from `$ref` might not have `additionalProperties: false` applied,
+        # we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid.
+        return _ensure_strict_json_schema(json_schema, path=path, root=root)
+
+    return json_schema
+
+
+def resolve_ref(*, root: dict[str, object], ref: str) -> object:
+    if not ref.startswith("#/"):
+        raise ValueError(f"Unexpected $ref format {ref!r}; Does not start with #/")
+
+    path = ref[2:].split("/")
+    resolved = root
+    for key in path:
+        value = resolved[key]
+        assert is_dict(value), f"encountered non-dictionary entry while resolving {ref} - {resolved}"
+        resolved = value
+
+    return resolved
+
+
+def is_basemodel_type(typ: type) -> TypeGuard[type[pydantic.BaseModel]]:
+    if not inspect.isclass(typ):
+        return False
+    return issubclass(typ, pydantic.BaseModel)
+
+
+def is_dataclass_like_type(typ: type) -> bool:
+    """Returns True if the given type likely used `@pydantic.dataclass`"""
+    return hasattr(typ, "__pydantic_config__")
+
+
+def is_dict(obj: object) -> TypeGuard[dict[str, object]]:
+    # just pretend that we know there are only `str` keys
+    # as that check is not worth the performance cost
+    return _is_dict(obj)
+
+
+def has_more_than_n_keys(obj: dict[str, object], n: int) -> bool:
+    i = 0
+    for _ in obj.keys():
+        i += 1
+        if i > n:
+            return True
+    return False
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_tools.py b/.venv/lib/python3.11/site-packages/openai/lib/_tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..8478ed676c58511bff47d75f1e2d686e834de94f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/_tools.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+from typing import Any, Dict, cast
+
+import pydantic
+
+from ._pydantic import to_strict_json_schema
+from ..types.chat import ChatCompletionToolParam
+from ..types.shared_params import FunctionDefinition
+
+
+class PydanticFunctionTool(Dict[str, Any]):
+    """Dictionary wrapper so we can pass the given base model
+    throughout the entire request stack without having to special
+    case it.
+    """
+
+    model: type[pydantic.BaseModel]
+
+    def __init__(self, defn: FunctionDefinition, model: type[pydantic.BaseModel]) -> None:
+        super().__init__(defn)
+        self.model = model
+
+    def cast(self) -> FunctionDefinition:
+        return cast(FunctionDefinition, self)
+
+
+def pydantic_function_tool(
+    model: type[pydantic.BaseModel],
+    *,
+    name: str | None = None,  # inferred from class name by default
+    description: str | None = None,  # inferred from class docstring by default
+) -> ChatCompletionToolParam:
+    if description is None:
+        # note: we intentionally don't use `.getdoc()` to avoid
+        # including pydantic's docstrings
+        description = model.__doc__
+
+    function = PydanticFunctionTool(
+        {
+            "name": name or model.__name__,
+            "strict": True,
+            "parameters": to_strict_json_schema(model),
+        },
+        model,
+    ).cast()
+
+    if description is not None:
+        function["description"] = description
+
+    return {
+        "type": "function",
+        "function": function,
+    }
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/_validators.py b/.venv/lib/python3.11/site-packages/openai/lib/_validators.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf24cd2294ef60756ada8515a65a9dcf9afbc4ae
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/_validators.py
@@ -0,0 +1,809 @@
+# pyright: basic
+from __future__ import annotations
+
+import os
+import sys
+from typing import Any, TypeVar, Callable, Optional, NamedTuple
+from typing_extensions import TypeAlias
+
+from .._extras import pandas as pd
+
+
+class Remediation(NamedTuple):
+    name: str
+    immediate_msg: Optional[str] = None
+    necessary_msg: Optional[str] = None
+    necessary_fn: Optional[Callable[[Any], Any]] = None
+    optional_msg: Optional[str] = None
+    optional_fn: Optional[Callable[[Any], Any]] = None
+    error_msg: Optional[str] = None
+
+
+OptionalDataFrameT = TypeVar("OptionalDataFrameT", bound="Optional[pd.DataFrame]")
+
+
+def num_examples_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will only print out the number of examples and recommend to the user to increase the number of examples if less than 100.
+    """
+    MIN_EXAMPLES = 100
+    optional_suggestion = (
+        ""
+        if len(df) >= MIN_EXAMPLES
+        else ". In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples"
+    )
+    immediate_msg = f"\n- Your file contains {len(df)} prompt-completion pairs{optional_suggestion}"
+    return Remediation(name="num_examples", immediate_msg=immediate_msg)
+
+
+def necessary_column_validator(df: pd.DataFrame, necessary_column: str) -> Remediation:
+    """
+    This validator will ensure that the necessary column is present in the dataframe.
+    """
+
+    def lower_case_column(df: pd.DataFrame, column: Any) -> pd.DataFrame:
+        cols = [c for c in df.columns if str(c).lower() == column]
+        df.rename(columns={cols[0]: column.lower()}, inplace=True)
+        return df
+
+    immediate_msg = None
+    necessary_fn = None
+    necessary_msg = None
+    error_msg = None
+
+    if necessary_column not in df.columns:
+        if necessary_column in [str(c).lower() for c in df.columns]:
+
+            def lower_case_column_creator(df: pd.DataFrame) -> pd.DataFrame:
+                return lower_case_column(df, necessary_column)
+
+            necessary_fn = lower_case_column_creator
+            immediate_msg = f"\n- The `{necessary_column}` column/key should be lowercase"
+            necessary_msg = f"Lower case column name to `{necessary_column}`"
+        else:
+            error_msg = f"`{necessary_column}` column/key is missing. Please make sure you name your columns/keys appropriately, then retry"
+
+    return Remediation(
+        name="necessary_column",
+        immediate_msg=immediate_msg,
+        necessary_msg=necessary_msg,
+        necessary_fn=necessary_fn,
+        error_msg=error_msg,
+    )
+
+
+def additional_column_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
+    """
+    This validator will remove additional columns from the dataframe.
+    """
+    additional_columns = []
+    necessary_msg = None
+    immediate_msg = None
+    necessary_fn = None  # type: ignore
+
+    if len(df.columns) > 2:
+        additional_columns = [c for c in df.columns if c not in fields]
+        warn_message = ""
+        for ac in additional_columns:
+            dups = [c for c in additional_columns if ac in c]
+            if len(dups) > 0:
+                warn_message += f"\n  WARNING: Some of the additional columns/keys contain `{ac}` in their name. These will be ignored, and the column/key `{ac}` will be used instead. This could also result from a duplicate column/key in the provided file."
+        immediate_msg = f"\n- The input file should contain exactly two columns/keys per row. Additional columns/keys present are: {additional_columns}{warn_message}"
+        necessary_msg = f"Remove additional columns/keys: {additional_columns}"
+
+        def necessary_fn(x: Any) -> Any:
+            return x[fields]
+
+    return Remediation(
+        name="additional_column",
+        immediate_msg=immediate_msg,
+        necessary_msg=necessary_msg,
+        necessary_fn=necessary_fn,
+    )
+
+
+def non_empty_field_validator(df: pd.DataFrame, field: str = "completion") -> Remediation:
+    """
+    This validator will ensure that no completion is empty.
+    """
+    necessary_msg = None
+    necessary_fn = None  # type: ignore
+    immediate_msg = None
+
+    if df[field].apply(lambda x: x == "").any() or df[field].isnull().any():
+        empty_rows = (df[field] == "") | (df[field].isnull())
+        empty_indexes = df.reset_index().index[empty_rows].tolist()
+        immediate_msg = f"\n- `{field}` column/key should not contain empty strings. These are rows: {empty_indexes}"
+
+        def necessary_fn(x: Any) -> Any:
+            return x[x[field] != ""].dropna(subset=[field])
+
+        necessary_msg = f"Remove {len(empty_indexes)} rows with empty {field}s"
+
+    return Remediation(
+        name=f"empty_{field}",
+        immediate_msg=immediate_msg,
+        necessary_msg=necessary_msg,
+        necessary_fn=necessary_fn,
+    )
+
+
+def duplicated_rows_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
+    """
+    This validator will suggest to the user to remove duplicate rows if they exist.
+    """
+    duplicated_rows = df.duplicated(subset=fields)
+    duplicated_indexes = df.reset_index().index[duplicated_rows].tolist()
+    immediate_msg = None
+    optional_msg = None
+    optional_fn = None  # type: ignore
+
+    if len(duplicated_indexes) > 0:
+        immediate_msg = f"\n- There are {len(duplicated_indexes)} duplicated {'-'.join(fields)} sets. These are rows: {duplicated_indexes}"
+        optional_msg = f"Remove {len(duplicated_indexes)} duplicate rows"
+
+        def optional_fn(x: Any) -> Any:
+            return x.drop_duplicates(subset=fields)
+
+    return Remediation(
+        name="duplicated_rows",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+    )
+
+
+def long_examples_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will suggest to the user to remove examples that are too long.
+    """
+    immediate_msg = None
+    optional_msg = None
+    optional_fn = None  # type: ignore
+
+    ft_type = infer_task_type(df)
+    if ft_type != "open-ended generation":
+
+        def get_long_indexes(d: pd.DataFrame) -> Any:
+            long_examples = d.apply(lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1)
+            return d.reset_index().index[long_examples].tolist()
+
+        long_indexes = get_long_indexes(df)
+
+        if len(long_indexes) > 0:
+            immediate_msg = f"\n- There are {len(long_indexes)} examples that are very long. These are rows: {long_indexes}\nFor conditional generation, and for classification the examples shouldn't be longer than 2048 tokens."
+            optional_msg = f"Remove {len(long_indexes)} long examples"
+
+            def optional_fn(x: Any) -> Any:
+                long_indexes_to_drop = get_long_indexes(x)
+                if long_indexes != long_indexes_to_drop:
+                    sys.stdout.write(
+                        f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
+                    )
+                return x.drop(long_indexes_to_drop)
+
+    return Remediation(
+        name="long_examples",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+    )
+
+
+def common_prompt_suffix_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will suggest to add a common suffix to the prompt if one doesn't already exist in case of classification or conditional generation.
+    """
+    error_msg = None
+    immediate_msg = None
+    optional_msg = None
+    optional_fn = None  # type: ignore
+
+    # Find a suffix which is not contained within the prompt otherwise
+    suggested_suffix = "\n\n### =>\n\n"
+    suffix_options = [
+        " ->",
+        "\n\n###\n\n",
+        "\n\n===\n\n",
+        "\n\n---\n\n",
+        "\n\n===>\n\n",
+        "\n\n--->\n\n",
+    ]
+    for suffix_option in suffix_options:
+        if suffix_option == " ->":
+            if df.prompt.str.contains("\n").any():
+                continue
+        if df.prompt.str.contains(suffix_option, regex=False).any():
+            continue
+        suggested_suffix = suffix_option
+        break
+    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
+
+    ft_type = infer_task_type(df)
+    if ft_type == "open-ended generation":
+        return Remediation(name="common_suffix")
+
+    def add_suffix(x: Any, suffix: Any) -> Any:
+        x["prompt"] += suffix
+        return x
+
+    common_suffix = get_common_xfix(df.prompt, xfix="suffix")
+    if (df.prompt == common_suffix).all():
+        error_msg = f"All prompts are identical: `{common_suffix}`\nConsider leaving the prompts blank if you want to do open-ended generation, otherwise ensure prompts are different"
+        return Remediation(name="common_suffix", error_msg=error_msg)
+
+    if common_suffix != "":
+        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
+        immediate_msg = f"\n- All prompts end with suffix `{common_suffix_new_line_handled}`"
+        if len(common_suffix) > 10:
+            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
+        if df.prompt.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
+            immediate_msg += f"\n  WARNING: Some of your prompts contain the suffix `{common_suffix}` more than once. We strongly suggest that you review your prompts and add a unique suffix"
+
+    else:
+        immediate_msg = "\n- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty"
+
+    if common_suffix == "":
+        optional_msg = f"Add a suffix separator `{display_suggested_suffix}` to all prompts"
+
+        def optional_fn(x: Any) -> Any:
+            return add_suffix(x, suggested_suffix)
+
+    return Remediation(
+        name="common_completion_suffix",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+        error_msg=error_msg,
+    )
+
+
+def common_prompt_prefix_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will suggest to remove a common prefix from the prompt if a long one exist.
+    """
+    MAX_PREFIX_LEN = 12
+
+    immediate_msg = None
+    optional_msg = None
+    optional_fn = None  # type: ignore
+
+    common_prefix = get_common_xfix(df.prompt, xfix="prefix")
+    if common_prefix == "":
+        return Remediation(name="common_prefix")
+
+    def remove_common_prefix(x: Any, prefix: Any) -> Any:
+        x["prompt"] = x["prompt"].str[len(prefix) :]
+        return x
+
+    if (df.prompt == common_prefix).all():
+        # already handled by common_suffix_validator
+        return Remediation(name="common_prefix")
+
+    if common_prefix != "":
+        immediate_msg = f"\n- All prompts start with prefix `{common_prefix}`"
+        if MAX_PREFIX_LEN < len(common_prefix):
+            immediate_msg += ". Fine-tuning doesn't require the instruction specifying the task, or a few-shot example scenario. Most of the time you should only add the input data into the prompt, and the desired output into the completion"
+            optional_msg = f"Remove prefix `{common_prefix}` from all prompts"
+
+            def optional_fn(x: Any) -> Any:
+                return remove_common_prefix(x, common_prefix)
+
+    return Remediation(
+        name="common_prompt_prefix",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+    )
+
+
+def common_completion_prefix_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will suggest to remove a common prefix from the completion if a long one exist.
+    """
+    MAX_PREFIX_LEN = 5
+
+    common_prefix = get_common_xfix(df.completion, xfix="prefix")
+    ws_prefix = len(common_prefix) > 0 and common_prefix[0] == " "
+    if len(common_prefix) < MAX_PREFIX_LEN:
+        return Remediation(name="common_prefix")
+
+    def remove_common_prefix(x: Any, prefix: Any, ws_prefix: Any) -> Any:
+        x["completion"] = x["completion"].str[len(prefix) :]
+        if ws_prefix:
+            # keep the single whitespace as prefix
+            x["completion"] = f" {x['completion']}"
+        return x
+
+    if (df.completion == common_prefix).all():
+        # already handled by common_suffix_validator
+        return Remediation(name="common_prefix")
+
+    immediate_msg = f"\n- All completions start with prefix `{common_prefix}`. Most of the time you should only add the output data into the completion, without any prefix"
+    optional_msg = f"Remove prefix `{common_prefix}` from all completions"
+
+    def optional_fn(x: Any) -> Any:
+        return remove_common_prefix(x, common_prefix, ws_prefix)
+
+    return Remediation(
+        name="common_completion_prefix",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+    )
+
+
+def common_completion_suffix_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will suggest to add a common suffix to the completion if one doesn't already exist in case of classification or conditional generation.
+    """
+    error_msg = None
+    immediate_msg = None
+    optional_msg = None
+    optional_fn = None  # type: ignore
+
+    ft_type = infer_task_type(df)
+    if ft_type == "open-ended generation" or ft_type == "classification":
+        return Remediation(name="common_suffix")
+
+    common_suffix = get_common_xfix(df.completion, xfix="suffix")
+    if (df.completion == common_suffix).all():
+        error_msg = f"All completions are identical: `{common_suffix}`\nEnsure completions are different, otherwise the model will just repeat `{common_suffix}`"
+        return Remediation(name="common_suffix", error_msg=error_msg)
+
+    # Find a suffix which is not contained within the completion otherwise
+    suggested_suffix = " [END]"
+    suffix_options = [
+        "\n",
+        ".",
+        " END",
+        "***",
+        "+++",
+        "&&&",
+        "$$$",
+        "@@@",
+        "%%%",
+    ]
+    for suffix_option in suffix_options:
+        if df.completion.str.contains(suffix_option, regex=False).any():
+            continue
+        suggested_suffix = suffix_option
+        break
+    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
+
+    def add_suffix(x: Any, suffix: Any) -> Any:
+        x["completion"] += suffix
+        return x
+
+    if common_suffix != "":
+        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
+        immediate_msg = f"\n- All completions end with suffix `{common_suffix_new_line_handled}`"
+        if len(common_suffix) > 10:
+            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
+        if df.completion.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
+            immediate_msg += f"\n  WARNING: Some of your completions contain the suffix `{common_suffix}` more than once. We suggest that you review your completions and add a unique ending"
+
+    else:
+        immediate_msg = "\n- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples."
+
+    if common_suffix == "":
+        optional_msg = f"Add a suffix ending `{display_suggested_suffix}` to all completions"
+
+        def optional_fn(x: Any) -> Any:
+            return add_suffix(x, suggested_suffix)
+
+    return Remediation(
+        name="common_completion_suffix",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+        error_msg=error_msg,
+    )
+
+
+def completions_space_start_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will suggest to add a space at the start of the completion if it doesn't already exist. This helps with tokenization.
+    """
+
+    def add_space_start(x: Any) -> Any:
+        x["completion"] = x["completion"].apply(lambda s: ("" if s.startswith(" ") else " ") + s)
+        return x
+
+    optional_msg = None
+    optional_fn = None
+    immediate_msg = None
+
+    if df.completion.str[:1].nunique() != 1 or df.completion.values[0][0] != " ":
+        immediate_msg = "\n- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details"
+        optional_msg = "Add a whitespace character to the beginning of the completion"
+        optional_fn = add_space_start
+    return Remediation(
+        name="completion_space_start",
+        immediate_msg=immediate_msg,
+        optional_msg=optional_msg,
+        optional_fn=optional_fn,
+    )
+
+
+def lower_case_validator(df: pd.DataFrame, column: Any) -> Remediation | None:
+    """
+    This validator will suggest to lowercase the column values, if more than a third of letters are uppercase.
+    """
+
+    def lower_case(x: Any) -> Any:
+        x[column] = x[column].str.lower()
+        return x
+
+    count_upper = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.isupper())).sum()
+    count_lower = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.islower())).sum()
+
+    if count_upper * 2 > count_lower:
+        return Remediation(
+            name="lower_case",
+            immediate_msg=f"\n- More than a third of your `{column}` column/key is uppercase. Uppercase {column}s tends to perform worse than a mixture of case encountered in normal language. We recommend to lower case the data if that makes sense in your domain. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details",
+            optional_msg=f"Lowercase all your data in column/key `{column}`",
+            optional_fn=lower_case,
+        )
+    return None
+
+
+def read_any_format(
+    fname: str, fields: list[str] = ["prompt", "completion"]
+) -> tuple[pd.DataFrame | None, Remediation]:
+    """
+    This function will read a file saved in .csv, .json, .txt, .xlsx or .tsv format using pandas.
+     - for .xlsx it will read the first sheet
+     - for .txt it will assume completions and split on newline
+    """
+    remediation = None
+    necessary_msg = None
+    immediate_msg = None
+    error_msg = None
+    df = None
+
+    if os.path.isfile(fname):
+        try:
+            if fname.lower().endswith(".csv") or fname.lower().endswith(".tsv"):
+                file_extension_str, separator = ("CSV", ",") if fname.lower().endswith(".csv") else ("TSV", "\t")
+                immediate_msg = (
+                    f"\n- Based on your file extension, your file is formatted as a {file_extension_str} file"
+                )
+                necessary_msg = f"Your format `{file_extension_str}` will be converted to `JSONL`"
+                df = pd.read_csv(fname, sep=separator, dtype=str).fillna("")
+            elif fname.lower().endswith(".xlsx"):
+                immediate_msg = "\n- Based on your file extension, your file is formatted as an Excel file"
+                necessary_msg = "Your format `XLSX` will be converted to `JSONL`"
+                xls = pd.ExcelFile(fname)
+                sheets = xls.sheet_names
+                if len(sheets) > 1:
+                    immediate_msg += "\n- Your Excel file contains more than one sheet. Please either save as csv or ensure all data is present in the first sheet. WARNING: Reading only the first sheet..."
+                df = pd.read_excel(fname, dtype=str).fillna("")
+            elif fname.lower().endswith(".txt"):
+                immediate_msg = "\n- Based on your file extension, you provided a text file"
+                necessary_msg = "Your format `TXT` will be converted to `JSONL`"
+                with open(fname, "r") as f:
+                    content = f.read()
+                    df = pd.DataFrame(
+                        [["", line] for line in content.split("\n")],
+                        columns=fields,
+                        dtype=str,
+                    ).fillna("")
+            elif fname.lower().endswith(".jsonl"):
+                df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
+                if len(df) == 1:  # type: ignore
+                    # this is NOT what we expect for a .jsonl file
+                    immediate_msg = "\n- Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
+                    necessary_msg = "Your format `JSON` will be converted to `JSONL`"
+                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
+                else:
+                    pass  # this is what we expect for a .jsonl file
+            elif fname.lower().endswith(".json"):
+                try:
+                    # to handle case where .json file is actually a .jsonl file
+                    df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
+                    if len(df) == 1:  # type: ignore
+                        # this code path corresponds to a .json file that has one line
+                        df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
+                    else:
+                        # this is NOT what we expect for a .json file
+                        immediate_msg = "\n- Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
+                        necessary_msg = "Your format `JSON` will be converted to `JSONL`"
+                except ValueError:
+                    # this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
+                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
+            else:
+                error_msg = (
+                    "Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
+                )
+                if "." in fname:
+                    error_msg += f" Your file `{fname}` ends with the extension `.{fname.split('.')[-1]}` which is not supported."
+                else:
+                    error_msg += f" Your file `{fname}` is missing a file extension."
+
+        except (ValueError, TypeError):
+            file_extension_str = fname.split(".")[-1].upper()
+            error_msg = f"Your file `{fname}` does not appear to be in valid {file_extension_str} format. Please ensure your file is formatted as a valid {file_extension_str} file."
+
+    else:
+        error_msg = f"File {fname} does not exist."
+
+    remediation = Remediation(
+        name="read_any_format",
+        necessary_msg=necessary_msg,
+        immediate_msg=immediate_msg,
+        error_msg=error_msg,
+    )
+    return df, remediation
+
+
+def format_inferrer_validator(df: pd.DataFrame) -> Remediation:
+    """
+    This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.
+    It will also suggest to use ada and explain train/validation split benefits.
+    """
+    ft_type = infer_task_type(df)
+    immediate_msg = None
+    if ft_type == "classification":
+        immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training"
+    return Remediation(name="num_examples", immediate_msg=immediate_msg)
+
+
+def apply_necessary_remediation(df: OptionalDataFrameT, remediation: Remediation) -> OptionalDataFrameT:
+    """
+    This function will apply a necessary remediation to a dataframe, or print an error message if one exists.
+    """
+    if remediation.error_msg is not None:
+        sys.stderr.write(f"\n\nERROR in {remediation.name} validator: {remediation.error_msg}\n\nAborting...")
+        sys.exit(1)
+    if remediation.immediate_msg is not None:
+        sys.stdout.write(remediation.immediate_msg)
+    if remediation.necessary_fn is not None:
+        df = remediation.necessary_fn(df)
+    return df
+
+
+def accept_suggestion(input_text: str, auto_accept: bool) -> bool:
+    sys.stdout.write(input_text)
+    if auto_accept:
+        sys.stdout.write("Y\n")
+        return True
+    return input().lower() != "n"
+
+
+def apply_optional_remediation(
+    df: pd.DataFrame, remediation: Remediation, auto_accept: bool
+) -> tuple[pd.DataFrame, bool]:
+    """
+    This function will apply an optional remediation to a dataframe, based on the user input.
+    """
+    optional_applied = False
+    input_text = f"- [Recommended] {remediation.optional_msg} [Y/n]: "
+    if remediation.optional_msg is not None:
+        if accept_suggestion(input_text, auto_accept):
+            assert remediation.optional_fn is not None
+            df = remediation.optional_fn(df)
+            optional_applied = True
+    if remediation.necessary_msg is not None:
+        sys.stdout.write(f"- [Necessary] {remediation.necessary_msg}\n")
+    return df, optional_applied
+
+
+def estimate_fine_tuning_time(df: pd.DataFrame) -> None:
+    """
+    Estimate the time it'll take to fine-tune the dataset
+    """
+    ft_format = infer_task_type(df)
+    expected_time = 1.0
+    if ft_format == "classification":
+        num_examples = len(df)
+        expected_time = num_examples * 1.44
+    else:
+        size = df.memory_usage(index=True).sum()
+        expected_time = size * 0.0515
+
+    def format_time(time: float) -> str:
+        if time < 60:
+            return f"{round(time, 2)} seconds"
+        elif time < 3600:
+            return f"{round(time / 60, 2)} minutes"
+        elif time < 86400:
+            return f"{round(time / 3600, 2)} hours"
+        else:
+            return f"{round(time / 86400, 2)} days"
+
+    time_string = format_time(expected_time + 140)
+    sys.stdout.write(
+        f"Once your model starts training, it'll approximately take {time_string} to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n"
+    )
+
+
+def get_outfnames(fname: str, split: bool) -> list[str]:
+    suffixes = ["_train", "_valid"] if split else [""]
+    i = 0
+    while True:
+        index_suffix = f" ({i})" if i > 0 else ""
+        candidate_fnames = [f"{os.path.splitext(fname)[0]}_prepared{suffix}{index_suffix}.jsonl" for suffix in suffixes]
+        if not any(os.path.isfile(f) for f in candidate_fnames):
+            return candidate_fnames
+        i += 1
+
+
+def get_classification_hyperparams(df: pd.DataFrame) -> tuple[int, object]:
+    n_classes = df.completion.nunique()
+    pos_class = None
+    if n_classes == 2:
+        pos_class = df.completion.value_counts().index[0]
+    return n_classes, pos_class
+
+
+def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_accept: bool) -> None:
+    """
+    This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file.
+    For classification it will optionally ask the user if they would like to split the data into train/valid files, and modify the suggested command to include the valid set.
+    """
+    ft_format = infer_task_type(df)
+    common_prompt_suffix = get_common_xfix(df.prompt, xfix="suffix")
+    common_completion_suffix = get_common_xfix(df.completion, xfix="suffix")
+
+    split = False
+    input_text = "- [Recommended] Would you like to split into training and validation set? [Y/n]: "
+    if ft_format == "classification":
+        if accept_suggestion(input_text, auto_accept):
+            split = True
+
+    additional_params = ""
+    common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n")
+    common_completion_suffix_new_line_handled = common_completion_suffix.replace("\n", "\\n")
+    optional_ending_string = (
+        f' Make sure to include `stop=["{common_completion_suffix_new_line_handled}"]` so that the generated texts ends at the expected place.'
+        if len(common_completion_suffix_new_line_handled) > 0
+        else ""
+    )
+
+    input_text = "\n\nYour data will be written to a new JSONL file. Proceed [Y/n]: "
+
+    if not any_remediations and not split:
+        sys.stdout.write(
+            f'\nYou can use your file for fine-tuning:\n> openai api fine_tunes.create -t "{fname}"{additional_params}\n\nAfter you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt.{optional_ending_string}\n'
+        )
+        estimate_fine_tuning_time(df)
+
+    elif accept_suggestion(input_text, auto_accept):
+        fnames = get_outfnames(fname, split)
+        if split:
+            assert len(fnames) == 2 and "train" in fnames[0] and "valid" in fnames[1]
+            MAX_VALID_EXAMPLES = 1000
+            n_train = max(len(df) - MAX_VALID_EXAMPLES, int(len(df) * 0.8))
+            df_train = df.sample(n=n_train, random_state=42)
+            df_valid = df.drop(df_train.index)
+            df_train[["prompt", "completion"]].to_json(  # type: ignore
+                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
+            )
+            df_valid[["prompt", "completion"]].to_json(
+                fnames[1], lines=True, orient="records", force_ascii=False, indent=None
+            )
+
+            n_classes, pos_class = get_classification_hyperparams(df)
+            additional_params += " --compute_classification_metrics"
+            if n_classes == 2:
+                additional_params += f' --classification_positive_class "{pos_class}"'
+            else:
+                additional_params += f" --classification_n_classes {n_classes}"
+        else:
+            assert len(fnames) == 1
+            df[["prompt", "completion"]].to_json(
+                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
+            )
+
+        # Add -v VALID_FILE if we split the file into train / valid
+        files_string = ("s" if split else "") + " to `" + ("` and `".join(fnames))
+        valid_string = f' -v "{fnames[1]}"' if split else ""
+        separator_reminder = (
+            ""
+            if len(common_prompt_suffix_new_line_handled) == 0
+            else f"After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt."
+        )
+        sys.stdout.write(
+            f'\nWrote modified file{files_string}`\nFeel free to take a look!\n\nNow use that file when fine-tuning:\n> openai api fine_tunes.create -t "{fnames[0]}"{valid_string}{additional_params}\n\n{separator_reminder}{optional_ending_string}\n'
+        )
+        estimate_fine_tuning_time(df)
+    else:
+        sys.stdout.write("Aborting... did not write the file\n")
+
+
+def infer_task_type(df: pd.DataFrame) -> str:
+    """
+    Infer the likely fine-tuning task type from the data
+    """
+    CLASSIFICATION_THRESHOLD = 3  # min_average instances of each class
+    if sum(df.prompt.str.len()) == 0:
+        return "open-ended generation"
+
+    if len(df.completion.unique()) < len(df) / CLASSIFICATION_THRESHOLD:
+        return "classification"
+
+    return "conditional generation"
+
+
+def get_common_xfix(series: Any, xfix: str = "suffix") -> str:
+    """
+    Finds the longest common suffix or prefix of all the values in a series
+    """
+    common_xfix = ""
+    while True:
+        common_xfixes = (
+            series.str[-(len(common_xfix) + 1) :] if xfix == "suffix" else series.str[: len(common_xfix) + 1]
+        )  # first few or last few characters
+        if common_xfixes.nunique() != 1:  # we found the character at which we don't have a unique xfix anymore
+            break
+        elif common_xfix == common_xfixes.values[0]:  # the entire first row is a prefix of every other row
+            break
+        else:  # the first or last few characters are still common across all rows - let's try to add one more
+            common_xfix = common_xfixes.values[0]
+    return common_xfix
+
+
+Validator: TypeAlias = "Callable[[pd.DataFrame], Remediation | None]"
+
+
+def get_validators() -> list[Validator]:
+    return [
+        num_examples_validator,
+        lambda x: necessary_column_validator(x, "prompt"),
+        lambda x: necessary_column_validator(x, "completion"),
+        additional_column_validator,
+        non_empty_field_validator,
+        format_inferrer_validator,
+        duplicated_rows_validator,
+        long_examples_validator,
+        lambda x: lower_case_validator(x, "prompt"),
+        lambda x: lower_case_validator(x, "completion"),
+        common_prompt_suffix_validator,
+        common_prompt_prefix_validator,
+        common_completion_prefix_validator,
+        common_completion_suffix_validator,
+        completions_space_start_validator,
+    ]
+
+
+def apply_validators(
+    df: pd.DataFrame,
+    fname: str,
+    remediation: Remediation | None,
+    validators: list[Validator],
+    auto_accept: bool,
+    write_out_file_func: Callable[..., Any],
+) -> None:
+    optional_remediations: list[Remediation] = []
+    if remediation is not None:
+        optional_remediations.append(remediation)
+    for validator in validators:
+        remediation = validator(df)
+        if remediation is not None:
+            optional_remediations.append(remediation)
+            df = apply_necessary_remediation(df, remediation)
+
+    any_optional_or_necessary_remediations = any(
+        [
+            remediation
+            for remediation in optional_remediations
+            if remediation.optional_msg is not None or remediation.necessary_msg is not None
+        ]
+    )
+    any_necessary_applied = any(
+        [remediation for remediation in optional_remediations if remediation.necessary_msg is not None]
+    )
+    any_optional_applied = False
+
+    if any_optional_or_necessary_remediations:
+        sys.stdout.write("\n\nBased on the analysis we will perform the following actions:\n")
+        for remediation in optional_remediations:
+            df, optional_applied = apply_optional_remediation(df, remediation, auto_accept)
+            any_optional_applied = any_optional_applied or optional_applied
+    else:
+        sys.stdout.write("\n\nNo remediations found.\n")
+
+    any_optional_or_necessary_applied = any_optional_applied or any_necessary_applied
+
+    write_out_file_func(df, fname, any_optional_or_necessary_applied, auto_accept)
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/azure.py b/.venv/lib/python3.11/site-packages/openai/lib/azure.py
new file mode 100644
index 0000000000000000000000000000000000000000..f857d76e51554e1e705d612954b9edac60788b3d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/azure.py
@@ -0,0 +1,587 @@
+from __future__ import annotations
+
+import os
+import inspect
+from typing import Any, Union, Mapping, TypeVar, Callable, Awaitable, cast, overload
+from typing_extensions import Self, override
+
+import httpx
+
+from .._types import NOT_GIVEN, Omit, Query, Timeout, NotGiven
+from .._utils import is_given, is_mapping
+from .._client import OpenAI, AsyncOpenAI
+from .._compat import model_copy
+from .._models import FinalRequestOptions
+from .._streaming import Stream, AsyncStream
+from .._exceptions import OpenAIError
+from .._base_client import DEFAULT_MAX_RETRIES, BaseClient
+
+_deployments_endpoints = set(
+    [
+        "/completions",
+        "/chat/completions",
+        "/embeddings",
+        "/audio/transcriptions",
+        "/audio/translations",
+        "/audio/speech",
+        "/images/generations",
+    ]
+)
+
+
+AzureADTokenProvider = Callable[[], str]
+AsyncAzureADTokenProvider = Callable[[], "str | Awaitable[str]"]
+_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
+_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
+
+
+# we need to use a sentinel API key value for Azure AD
+# as we don't want to make the `api_key` in the main client Optional
+# and Azure AD tokens may be retrieved on a per-request basis
+API_KEY_SENTINEL = "".join(["<", "missing API key", ">"])
+
+
+class MutuallyExclusiveAuthError(OpenAIError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `api_key`, `azure_ad_token` and `azure_ad_token_provider` arguments are mutually exclusive; Only one can be passed at a time"
+        )
+
+
+class BaseAzureClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
+    @override
+    def _build_request(
+        self,
+        options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
+    ) -> httpx.Request:
+        if options.url in _deployments_endpoints and is_mapping(options.json_data):
+            model = options.json_data.get("model")
+            if model is not None and not "/deployments" in str(self.base_url):
+                options.url = f"/deployments/{model}{options.url}"
+
+        return super()._build_request(options, retries_taken=retries_taken)
+
+
+class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
+    @overload
+    def __init__(
+        self,
+        *,
+        azure_endpoint: str,
+        azure_deployment: str | None = None,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AzureADTokenProvider | None = None,
+        organization: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.Client | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        azure_deployment: str | None = None,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AzureADTokenProvider | None = None,
+        organization: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.Client | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        base_url: str,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AzureADTokenProvider | None = None,
+        organization: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.Client | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None: ...
+
+    def __init__(
+        self,
+        *,
+        api_version: str | None = None,
+        azure_endpoint: str | None = None,
+        azure_deployment: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AzureADTokenProvider | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        base_url: str | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.Client | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new synchronous azure openai client instance.
+
+        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
+        - `api_key` from `AZURE_OPENAI_API_KEY`
+        - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
+        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
+        - `api_version` from `OPENAI_API_VERSION`
+        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
+
+        Args:
+            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
+
+            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
+
+            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
+
+            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
+                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
+        """
+        if api_key is None:
+            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
+
+        if azure_ad_token is None:
+            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
+
+        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
+            raise OpenAIError(
+                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
+            )
+
+        if api_version is None:
+            api_version = os.environ.get("OPENAI_API_VERSION")
+
+        if api_version is None:
+            raise ValueError(
+                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
+            )
+
+        if default_query is None:
+            default_query = {"api-version": api_version}
+        else:
+            default_query = {**default_query, "api-version": api_version}
+
+        if base_url is None:
+            if azure_endpoint is None:
+                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
+
+            if azure_endpoint is None:
+                raise ValueError(
+                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
+                )
+
+            if azure_deployment is not None:
+                base_url = f"{azure_endpoint.rstrip('/')}/openai/deployments/{azure_deployment}"
+            else:
+                base_url = f"{azure_endpoint.rstrip('/')}/openai"
+        else:
+            if azure_endpoint is not None:
+                raise ValueError("base_url and azure_endpoint are mutually exclusive")
+
+        if api_key is None:
+            # define a sentinel value to avoid any typing issues
+            api_key = API_KEY_SENTINEL
+
+        super().__init__(
+            api_key=api_key,
+            organization=organization,
+            project=project,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            default_query=default_query,
+            http_client=http_client,
+            websocket_base_url=websocket_base_url,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self._api_version = api_version
+        self._azure_ad_token = azure_ad_token
+        self._azure_ad_token_provider = azure_ad_token_provider
+
+    @override
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        api_version: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AzureADTokenProvider | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        return super().copy(
+            api_key=api_key,
+            organization=organization,
+            project=project,
+            websocket_base_url=websocket_base_url,
+            base_url=base_url,
+            timeout=timeout,
+            http_client=http_client,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            set_default_headers=set_default_headers,
+            default_query=default_query,
+            set_default_query=set_default_query,
+            _extra_kwargs={
+                "api_version": api_version or self._api_version,
+                "azure_ad_token": azure_ad_token or self._azure_ad_token,
+                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
+                **_extra_kwargs,
+            },
+        )
+
+    with_options = copy
+
+    def _get_azure_ad_token(self) -> str | None:
+        if self._azure_ad_token is not None:
+            return self._azure_ad_token
+
+        provider = self._azure_ad_token_provider
+        if provider is not None:
+            token = provider()
+            if not token or not isinstance(token, str):  # pyright: ignore[reportUnnecessaryIsInstance]
+                raise ValueError(
+                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
+                )
+            return token
+
+        return None
+
+    @override
+    def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
+        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
+
+        options = model_copy(options)
+        options.headers = headers
+
+        azure_ad_token = self._get_azure_ad_token()
+        if azure_ad_token is not None:
+            if headers.get("Authorization") is None:
+                headers["Authorization"] = f"Bearer {azure_ad_token}"
+        elif self.api_key is not API_KEY_SENTINEL:
+            if headers.get("api-key") is None:
+                headers["api-key"] = self.api_key
+        else:
+            # should never be hit
+            raise ValueError("Unable to handle auth")
+
+        return options
+
+    def _configure_realtime(self, model: str, extra_query: Query) -> tuple[Query, dict[str, str]]:
+        auth_headers = {}
+        query = {
+            **extra_query,
+            "api-version": self._api_version,
+            "deployment": model,
+        }
+        if self.api_key != "<missing API key>":
+            auth_headers = {"api-key": self.api_key}
+        else:
+            token = self._get_azure_ad_token()
+            if token:
+                auth_headers = {"Authorization": f"Bearer {token}"}
+        return query, auth_headers
+
+
+class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], AsyncOpenAI):
+    @overload
+    def __init__(
+        self,
+        *,
+        azure_endpoint: str,
+        azure_deployment: str | None = None,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.AsyncClient | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        azure_deployment: str | None = None,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.AsyncClient | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        base_url: str,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.AsyncClient | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None: ...
+
+    def __init__(
+        self,
+        *,
+        azure_endpoint: str | None = None,
+        azure_deployment: str | None = None,
+        api_version: str | None = None,
+        api_key: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        base_url: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        http_client: httpx.AsyncClient | None = None,
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new asynchronous azure openai client instance.
+
+        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
+        - `api_key` from `AZURE_OPENAI_API_KEY`
+        - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
+        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
+        - `api_version` from `OPENAI_API_VERSION`
+        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
+
+        Args:
+            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
+
+            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
+
+            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
+
+            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
+                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
+        """
+        if api_key is None:
+            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
+
+        if azure_ad_token is None:
+            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
+
+        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
+            raise OpenAIError(
+                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
+            )
+
+        if api_version is None:
+            api_version = os.environ.get("OPENAI_API_VERSION")
+
+        if api_version is None:
+            raise ValueError(
+                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
+            )
+
+        if default_query is None:
+            default_query = {"api-version": api_version}
+        else:
+            default_query = {**default_query, "api-version": api_version}
+
+        if base_url is None:
+            if azure_endpoint is None:
+                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
+
+            if azure_endpoint is None:
+                raise ValueError(
+                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
+                )
+
+            if azure_deployment is not None:
+                base_url = f"{azure_endpoint.rstrip('/')}/openai/deployments/{azure_deployment}"
+            else:
+                base_url = f"{azure_endpoint.rstrip('/')}/openai"
+        else:
+            if azure_endpoint is not None:
+                raise ValueError("base_url and azure_endpoint are mutually exclusive")
+
+        if api_key is None:
+            # define a sentinel value to avoid any typing issues
+            api_key = API_KEY_SENTINEL
+
+        super().__init__(
+            api_key=api_key,
+            organization=organization,
+            project=project,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            default_query=default_query,
+            http_client=http_client,
+            websocket_base_url=websocket_base_url,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self._api_version = api_version
+        self._azure_ad_token = azure_ad_token
+        self._azure_ad_token_provider = azure_ad_token_provider
+
+    @override
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
+        api_version: str | None = None,
+        azure_ad_token: str | None = None,
+        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        return super().copy(
+            api_key=api_key,
+            organization=organization,
+            project=project,
+            websocket_base_url=websocket_base_url,
+            base_url=base_url,
+            timeout=timeout,
+            http_client=http_client,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            set_default_headers=set_default_headers,
+            default_query=default_query,
+            set_default_query=set_default_query,
+            _extra_kwargs={
+                "api_version": api_version or self._api_version,
+                "azure_ad_token": azure_ad_token or self._azure_ad_token,
+                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
+                **_extra_kwargs,
+            },
+        )
+
+    with_options = copy
+
+    async def _get_azure_ad_token(self) -> str | None:
+        if self._azure_ad_token is not None:
+            return self._azure_ad_token
+
+        provider = self._azure_ad_token_provider
+        if provider is not None:
+            token = provider()
+            if inspect.isawaitable(token):
+                token = await token
+            if not token or not isinstance(cast(Any, token), str):
+                raise ValueError(
+                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
+                )
+            return str(token)
+
+        return None
+
+    @override
+    async def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
+        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
+
+        options = model_copy(options)
+        options.headers = headers
+
+        azure_ad_token = await self._get_azure_ad_token()
+        if azure_ad_token is not None:
+            if headers.get("Authorization") is None:
+                headers["Authorization"] = f"Bearer {azure_ad_token}"
+        elif self.api_key is not API_KEY_SENTINEL:
+            if headers.get("api-key") is None:
+                headers["api-key"] = self.api_key
+        else:
+            # should never be hit
+            raise ValueError("Unable to handle auth")
+
+        return options
+
+    async def _configure_realtime(self, model: str, extra_query: Query) -> tuple[Query, dict[str, str]]:
+        auth_headers = {}
+        query = {
+            **extra_query,
+            "api-version": self._api_version,
+            "deployment": model,
+        }
+        if self.api_key != "<missing API key>":
+            auth_headers = {"api-key": self.api_key}
+        else:
+            token = await self._get_azure_ad_token()
+            if token:
+                auth_headers = {"Authorization": f"Bearer {token}"}
+        return query, auth_headers
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/__init__.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb378d25619baf4847198a00cf994ea40463cee0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__init__.py
@@ -0,0 +1,8 @@
+from ._assistants import (
+    AssistantEventHandler as AssistantEventHandler,
+    AssistantEventHandlerT as AssistantEventHandlerT,
+    AssistantStreamManager as AssistantStreamManager,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
+)
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..987a5e4cb76912ed40544820269046495f9098c6
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/_assistants.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/_assistants.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e7cff919eefcd6ae239b2334b5c28f2a302877f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/_assistants.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/_deltas.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/_deltas.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e4befd2a18ff636f323d941f9497d10c51a2f36
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/streaming/__pycache__/_deltas.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/_assistants.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/_assistants.py
new file mode 100644
index 0000000000000000000000000000000000000000..6efb3ca3f1f870c39535b884a2cfdaeb17661b9d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/_assistants.py
@@ -0,0 +1,1038 @@
+from __future__ import annotations
+
+import asyncio
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
+from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
+
+import httpx
+
+from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
+from ..._compat import model_dump
+from ..._models import construct_type
+from ..._streaming import Stream, AsyncStream
+from ...types.beta import AssistantStreamEvent
+from ...types.beta.threads import (
+    Run,
+    Text,
+    Message,
+    ImageFile,
+    TextDelta,
+    MessageDelta,
+    MessageContent,
+    MessageContentDelta,
+)
+from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
+
+
+class AssistantEventHandler:
+    text_deltas: Iterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    def __next__(self) -> AssistantStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[AssistantStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            self.__stream.close()
+
+    def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    def __text_deltas__(self) -> Iterator[str]:
+        for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equation")),
+        """
+
+    def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+            or event.event == "thread.run.incomplete"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    self.on_image_file_done(content.image_file)
+
+            self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+
+            self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    def __stream__(self) -> Iterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            for event in stream:
+                self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            self.on_timeout()
+            self.on_exception(exc)
+            raise
+        except Exception as exc:
+            self.on_exception(exc)
+            raise
+        finally:
+            self.on_end()
+
+
+AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
+
+
+class AssistantStreamManager(Generic[AssistantEventHandlerT]):
+    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
+    so that a context manager can be used.
+
+    ```py
+    with client.threads.create_and_run_stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[AssistantStreamEvent]],
+        *,
+        event_handler: AssistantEventHandlerT,
+    ) -> None:
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    def __enter__(self) -> AssistantEventHandlerT:
+        self.__stream = self.__api_request()
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncAssistantEventHandler:
+    text_deltas: AsyncIterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    async for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    async def __anext__(self) -> AssistantStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            await self.__stream.close()
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    async def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        await self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    async def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        await self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    async def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        await self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    async def __text_deltas__(self) -> AsyncIterator[str]:
+        async for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    async def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    async def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    async def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    async def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    async def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    async def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    async def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    async def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
+        """
+
+    async def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    async def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        await self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                await self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+            or event.event == "thread.run.incomplete"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            await self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        await self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                await self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                await self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            await self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    await self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    await self.on_image_file_done(content.image_file)
+
+            await self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            await self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        await self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            await self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        await self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            await self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+
+            await self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            async for event in stream:
+                await self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            await self.on_timeout()
+            await self.on_exception(exc)
+            raise
+        except Exception as exc:
+            await self.on_exception(exc)
+            raise
+        finally:
+            await self.on_end()
+
+
+AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
+
+
+class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
+    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.threads.create_and_run_stream(...) as stream:
+        async for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
+        *,
+        event_handler: AsyncAssistantEventHandlerT,
+    ) -> None:
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
+        self.__stream = await self.__api_request
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def accumulate_run_step(
+    *,
+    event: AssistantStreamEvent,
+    run_step_snapshots: dict[str, RunStep],
+) -> None:
+    if event.event == "thread.run.step.created":
+        run_step_snapshots[event.data.id] = event.data
+        return
+
+    if event.event == "thread.run.step.delta":
+        data = event.data
+        snapshot = run_step_snapshots[data.id]
+
+        if data.delta:
+            merged = accumulate_delta(
+                cast(
+                    "dict[object, object]",
+                    model_dump(snapshot, exclude_unset=True, warnings=False),
+                ),
+                cast(
+                    "dict[object, object]",
+                    model_dump(data.delta, exclude_unset=True, warnings=False),
+                ),
+            )
+            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
+
+    return None
+
+
+def accumulate_event(
+    *,
+    event: AssistantStreamEvent,
+    current_message_snapshot: Message | None,
+) -> tuple[Message | None, list[MessageContentDelta]]:
+    """Returns a tuple of message snapshot and newly created text message deltas"""
+    if event.event == "thread.message.created":
+        return event.data, []
+
+    new_content: list[MessageContentDelta] = []
+
+    if event.event != "thread.message.delta":
+        return current_message_snapshot, []
+
+    if not current_message_snapshot:
+        raise RuntimeError("Encountered a message delta with no previous snapshot")
+
+    data = event.data
+    if data.delta.content:
+        for content_delta in data.delta.content:
+            try:
+                block = current_message_snapshot.content[content_delta.index]
+            except IndexError:
+                current_message_snapshot.content.insert(
+                    content_delta.index,
+                    cast(
+                        MessageContent,
+                        construct_type(
+                            # mypy doesn't allow Content for some reason
+                            type_=cast(Any, MessageContent),
+                            value=model_dump(content_delta, exclude_unset=True, warnings=False),
+                        ),
+                    ),
+                )
+                new_content.append(content_delta)
+            else:
+                merged = accumulate_delta(
+                    cast(
+                        "dict[object, object]",
+                        model_dump(block, exclude_unset=True, warnings=False),
+                    ),
+                    cast(
+                        "dict[object, object]",
+                        model_dump(content_delta, exclude_unset=True, warnings=False),
+                    ),
+                )
+                current_message_snapshot.content[content_delta.index] = cast(
+                    MessageContent,
+                    construct_type(
+                        # mypy doesn't allow Content for some reason
+                        type_=cast(Any, MessageContent),
+                        value=merged,
+                    ),
+                )
+
+    return current_message_snapshot, new_content
+
+
+def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
+    for key, delta_value in delta.items():
+        if key not in acc:
+            acc[key] = delta_value
+            continue
+
+        acc_value = acc[key]
+        if acc_value is None:
+            acc[key] = delta_value
+            continue
+
+        # the `index` property is used in arrays of objects so it should
+        # not be accumulated like other values e.g.
+        # [{'foo': 'bar', 'index': 0}]
+        #
+        # the same applies to `type` properties as they're used for
+        # discriminated unions
+        if key == "index" or key == "type":
+            acc[key] = delta_value
+            continue
+
+        if isinstance(acc_value, str) and isinstance(delta_value, str):
+            acc_value += delta_value
+        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
+            acc_value += delta_value
+        elif is_dict(acc_value) and is_dict(delta_value):
+            acc_value = accumulate_delta(acc_value, delta_value)
+        elif is_list(acc_value) and is_list(delta_value):
+            # for lists of non-dictionary items we'll only ever get new entries
+            # in the array, existing entries will never be changed
+            if all(isinstance(x, (str, int, float)) for x in acc_value):
+                acc_value.extend(delta_value)
+                continue
+
+            for delta_entry in delta_value:
+                if not is_dict(delta_entry):
+                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
+
+                try:
+                    index = delta_entry["index"]
+                except KeyError as exc:
+                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
+
+                if not isinstance(index, int):
+                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
+
+                try:
+                    acc_entry = acc_value[index]
+                except IndexError:
+                    acc_value.insert(index, delta_entry)
+                else:
+                    if not is_dict(acc_entry):
+                        raise TypeError("not handled yet")
+
+                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
+
+        acc[key] = acc_value
+
+    return acc
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/_deltas.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/_deltas.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5e13176122c7e95d564703e1f5bf3df48a682b6
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/_deltas.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from ..._utils import is_dict, is_list
+
+
+def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
+    for key, delta_value in delta.items():
+        if key not in acc:
+            acc[key] = delta_value
+            continue
+
+        acc_value = acc[key]
+        if acc_value is None:
+            acc[key] = delta_value
+            continue
+
+        # the `index` property is used in arrays of objects so it should
+        # not be accumulated like other values e.g.
+        # [{'foo': 'bar', 'index': 0}]
+        #
+        # the same applies to `type` properties as they're used for
+        # discriminated unions
+        if key == "index" or key == "type":
+            acc[key] = delta_value
+            continue
+
+        if isinstance(acc_value, str) and isinstance(delta_value, str):
+            acc_value += delta_value
+        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
+            acc_value += delta_value
+        elif is_dict(acc_value) and is_dict(delta_value):
+            acc_value = accumulate_delta(acc_value, delta_value)
+        elif is_list(acc_value) and is_list(delta_value):
+            # for lists of non-dictionary items we'll only ever get new entries
+            # in the array, existing entries will never be changed
+            if all(isinstance(x, (str, int, float)) for x in acc_value):
+                acc_value.extend(delta_value)
+                continue
+
+            for delta_entry in delta_value:
+                if not is_dict(delta_entry):
+                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
+
+                try:
+                    index = delta_entry["index"]
+                except KeyError as exc:
+                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
+
+                if not isinstance(index, int):
+                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
+
+                try:
+                    acc_entry = acc_value[index]
+                except IndexError:
+                    acc_value.insert(index, delta_entry)
+                else:
+                    if not is_dict(acc_entry):
+                        raise TypeError("not handled yet")
+
+                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
+
+        acc[key] = acc_value
+
+    return acc
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__init__.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfa3f3f2e3b5c97252fb6cec3c6343dab4f967a9
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__init__.py
@@ -0,0 +1,27 @@
+from ._types import (
+    ParsedChoiceSnapshot as ParsedChoiceSnapshot,
+    ParsedChatCompletionSnapshot as ParsedChatCompletionSnapshot,
+    ParsedChatCompletionMessageSnapshot as ParsedChatCompletionMessageSnapshot,
+)
+from ._events import (
+    ChunkEvent as ChunkEvent,
+    ContentDoneEvent as ContentDoneEvent,
+    RefusalDoneEvent as RefusalDoneEvent,
+    ContentDeltaEvent as ContentDeltaEvent,
+    RefusalDeltaEvent as RefusalDeltaEvent,
+    LogprobsContentDoneEvent as LogprobsContentDoneEvent,
+    LogprobsRefusalDoneEvent as LogprobsRefusalDoneEvent,
+    ChatCompletionStreamEvent as ChatCompletionStreamEvent,
+    LogprobsContentDeltaEvent as LogprobsContentDeltaEvent,
+    LogprobsRefusalDeltaEvent as LogprobsRefusalDeltaEvent,
+    ParsedChatCompletionSnapshot as ParsedChatCompletionSnapshot,
+    FunctionToolCallArgumentsDoneEvent as FunctionToolCallArgumentsDoneEvent,
+    FunctionToolCallArgumentsDeltaEvent as FunctionToolCallArgumentsDeltaEvent,
+)
+from ._completions import (
+    ChatCompletionStream as ChatCompletionStream,
+    AsyncChatCompletionStream as AsyncChatCompletionStream,
+    ChatCompletionStreamState as ChatCompletionStreamState,
+    ChatCompletionStreamManager as ChatCompletionStreamManager,
+    AsyncChatCompletionStreamManager as AsyncChatCompletionStreamManager,
+)
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__pycache__/_completions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__pycache__/_completions.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d77c7fb7c1636f2da1cd6282607f9b24ba5eed7c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__pycache__/_completions.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__pycache__/_events.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__pycache__/_events.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b3beaefe698011e366f2fbca78184ae4db605dc
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/__pycache__/_events.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_completions.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_completions.py
new file mode 100644
index 0000000000000000000000000000000000000000..214609135447173ab048eef435712b0691b433cd
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_completions.py
@@ -0,0 +1,755 @@
+from __future__ import annotations
+
+import inspect
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, Callable, Iterable, Awaitable, AsyncIterator, cast
+from typing_extensions import Self, Iterator, assert_never
+
+from jiter import from_json
+
+from ._types import ParsedChoiceSnapshot, ParsedChatCompletionSnapshot, ParsedChatCompletionMessageSnapshot
+from ._events import (
+    ChunkEvent,
+    ContentDoneEvent,
+    RefusalDoneEvent,
+    ContentDeltaEvent,
+    RefusalDeltaEvent,
+    LogprobsContentDoneEvent,
+    LogprobsRefusalDoneEvent,
+    ChatCompletionStreamEvent,
+    LogprobsContentDeltaEvent,
+    LogprobsRefusalDeltaEvent,
+    FunctionToolCallArgumentsDoneEvent,
+    FunctionToolCallArgumentsDeltaEvent,
+)
+from .._deltas import accumulate_delta
+from ...._types import NOT_GIVEN, IncEx, NotGiven
+from ...._utils import is_given, consume_sync_iterator, consume_async_iterator
+from ...._compat import model_dump
+from ...._models import build, construct_type
+from ..._parsing import (
+    ResponseFormatT,
+    has_parseable_input,
+    maybe_parse_content,
+    parse_chat_completion,
+    get_input_tool_by_name,
+    solve_response_format_t,
+    parse_function_tool_arguments,
+)
+from ...._streaming import Stream, AsyncStream
+from ....types.chat import ChatCompletionChunk, ParsedChatCompletion, ChatCompletionToolParam
+from ...._exceptions import LengthFinishReasonError, ContentFilterFinishReasonError
+from ....types.chat.chat_completion import ChoiceLogprobs
+from ....types.chat.chat_completion_chunk import Choice as ChoiceChunk
+from ....types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
+
+
+class ChatCompletionStream(Generic[ResponseFormatT]):
+    """Wrapper over the Chat Completions streaming API that adds helpful
+    events such as `content.done`, supports automatically parsing
+    responses & tool calls and accumulates a `ChatCompletion` object
+    from each individual chunk.
+
+    https://platform.openai.com/docs/api-reference/streaming
+    """
+
+    def __init__(
+        self,
+        *,
+        raw_stream: Stream[ChatCompletionChunk],
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self._raw_stream = raw_stream
+        self._response = raw_stream.response
+        self._iterator = self.__stream__()
+        self._state = ChatCompletionStreamState(response_format=response_format, input_tools=input_tools)
+
+    def __next__(self) -> ChatCompletionStreamEvent[ResponseFormatT]:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        for item in self._iterator:
+            yield item
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self._response.close()
+
+    def get_final_completion(self) -> ParsedChatCompletion[ResponseFormatT]:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `ParsedChatCompletion` object.
+
+        If you passed a class type to `.stream()`, the `completion.choices[0].message.parsed`
+        property will be the content deserialised into that class, if there was any content returned
+        by the API.
+        """
+        self.until_done()
+        return self._state.get_final_completion()
+
+    def until_done(self) -> Self:
+        """Blocks until the stream has been consumed."""
+        consume_sync_iterator(self)
+        return self
+
+    @property
+    def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
+        return self._state.current_completion_snapshot
+
+    def __stream__(self) -> Iterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        for sse_event in self._raw_stream:
+            events_to_fire = self._state.handle_chunk(sse_event)
+            for event in events_to_fire:
+                yield event
+
+
+class ChatCompletionStreamManager(Generic[ResponseFormatT]):
+    """Context manager over a `ChatCompletionStream` that is returned by `.stream()`.
+
+    This context manager ensures the response cannot be leaked if you don't read
+    the stream to completion.
+
+    Usage:
+    ```py
+    with client.beta.chat.completions.stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[ChatCompletionChunk]],
+        *,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self.__stream: ChatCompletionStream[ResponseFormatT] | None = None
+        self.__api_request = api_request
+        self.__response_format = response_format
+        self.__input_tools = input_tools
+
+    def __enter__(self) -> ChatCompletionStream[ResponseFormatT]:
+        raw_stream = self.__api_request()
+
+        self.__stream = ChatCompletionStream(
+            raw_stream=raw_stream,
+            response_format=self.__response_format,
+            input_tools=self.__input_tools,
+        )
+
+        return self.__stream
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncChatCompletionStream(Generic[ResponseFormatT]):
+    """Wrapper over the Chat Completions streaming API that adds helpful
+    events such as `content.done`, supports automatically parsing
+    responses & tool calls and accumulates a `ChatCompletion` object
+    from each individual chunk.
+
+    https://platform.openai.com/docs/api-reference/streaming
+    """
+
+    def __init__(
+        self,
+        *,
+        raw_stream: AsyncStream[ChatCompletionChunk],
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self._raw_stream = raw_stream
+        self._response = raw_stream.response
+        self._iterator = self.__stream__()
+        self._state = ChatCompletionStreamState(response_format=response_format, input_tools=input_tools)
+
+    async def __anext__(self) -> ChatCompletionStreamEvent[ResponseFormatT]:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        async for item in self._iterator:
+            yield item
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self._response.aclose()
+
+    async def get_final_completion(self) -> ParsedChatCompletion[ResponseFormatT]:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `ParsedChatCompletion` object.
+
+        If you passed a class type to `.stream()`, the `completion.choices[0].message.parsed`
+        property will be the content deserialised into that class, if there was any content returned
+        by the API.
+        """
+        await self.until_done()
+        return self._state.get_final_completion()
+
+    async def until_done(self) -> Self:
+        """Blocks until the stream has been consumed."""
+        await consume_async_iterator(self)
+        return self
+
+    @property
+    def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
+        return self._state.current_completion_snapshot
+
+    async def __stream__(self) -> AsyncIterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        async for sse_event in self._raw_stream:
+            events_to_fire = self._state.handle_chunk(sse_event)
+            for event in events_to_fire:
+                yield event
+
+
+class AsyncChatCompletionStreamManager(Generic[ResponseFormatT]):
+    """Context manager over a `AsyncChatCompletionStream` that is returned by `.stream()`.
+
+    This context manager ensures the response cannot be leaked if you don't read
+    the stream to completion.
+
+    Usage:
+    ```py
+    async with client.beta.chat.completions.stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[ChatCompletionChunk]],
+        *,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self.__stream: AsyncChatCompletionStream[ResponseFormatT] | None = None
+        self.__api_request = api_request
+        self.__response_format = response_format
+        self.__input_tools = input_tools
+
+    async def __aenter__(self) -> AsyncChatCompletionStream[ResponseFormatT]:
+        raw_stream = await self.__api_request
+
+        self.__stream = AsyncChatCompletionStream(
+            raw_stream=raw_stream,
+            response_format=self.__response_format,
+            input_tools=self.__input_tools,
+        )
+
+        return self.__stream
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+class ChatCompletionStreamState(Generic[ResponseFormatT]):
+    """Helper class for manually accumulating `ChatCompletionChunk`s into a final `ChatCompletion` object.
+
+    This is useful in cases where you can't always use the `.stream()` method, e.g.
+
+    ```py
+    from openai.lib.streaming.chat import ChatCompletionStreamState
+
+    state = ChatCompletionStreamState()
+
+    stream = client.chat.completions.create(..., stream=True)
+    for chunk in response:
+        state.handle_chunk(chunk)
+
+        # can also access the accumulated `ChatCompletion` mid-stream
+        state.current_completion_snapshot
+
+    print(state.get_final_completion())
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven = NOT_GIVEN,
+    ) -> None:
+        self.__current_completion_snapshot: ParsedChatCompletionSnapshot | None = None
+        self.__choice_event_states: list[ChoiceEventState] = []
+
+        self._input_tools = [tool for tool in input_tools] if is_given(input_tools) else []
+        self._response_format = response_format
+        self._rich_response_format: type | NotGiven = response_format if inspect.isclass(response_format) else NOT_GIVEN
+
+    def get_final_completion(self) -> ParsedChatCompletion[ResponseFormatT]:
+        """Parse the final completion object.
+
+        Note this does not provide any guarantees that the stream has actually finished, you must
+        only call this method when the stream is finished.
+        """
+        return parse_chat_completion(
+            chat_completion=self.current_completion_snapshot,
+            response_format=self._rich_response_format,
+            input_tools=self._input_tools,
+        )
+
+    @property
+    def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
+        assert self.__current_completion_snapshot is not None
+        return self.__current_completion_snapshot
+
+    def handle_chunk(self, chunk: ChatCompletionChunk) -> Iterable[ChatCompletionStreamEvent[ResponseFormatT]]:
+        """Accumulate a new chunk into the snapshot and returns an iterable of events to yield."""
+        self.__current_completion_snapshot = self._accumulate_chunk(chunk)
+
+        return self._build_events(
+            chunk=chunk,
+            completion_snapshot=self.__current_completion_snapshot,
+        )
+
+    def _get_choice_state(self, choice: ChoiceChunk) -> ChoiceEventState:
+        try:
+            return self.__choice_event_states[choice.index]
+        except IndexError:
+            choice_state = ChoiceEventState(input_tools=self._input_tools)
+            self.__choice_event_states.append(choice_state)
+            return choice_state
+
+    def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionSnapshot:
+        completion_snapshot = self.__current_completion_snapshot
+
+        if completion_snapshot is None:
+            return _convert_initial_chunk_into_snapshot(chunk)
+
+        for choice in chunk.choices:
+            try:
+                choice_snapshot = completion_snapshot.choices[choice.index]
+                previous_tool_calls = choice_snapshot.message.tool_calls or []
+
+                choice_snapshot.message = cast(
+                    ParsedChatCompletionMessageSnapshot,
+                    construct_type(
+                        type_=ParsedChatCompletionMessageSnapshot,
+                        value=accumulate_delta(
+                            cast(
+                                "dict[object, object]",
+                                model_dump(
+                                    choice_snapshot.message,
+                                    # we don't want to serialise / deserialise our custom properties
+                                    # as they won't appear in the delta and we don't want to have to
+                                    # continuosly reparse the content
+                                    exclude=cast(
+                                        # cast required as mypy isn't smart enough to infer `True` here to `Literal[True]`
+                                        IncEx,
+                                        {
+                                            "parsed": True,
+                                            "tool_calls": {
+                                                idx: {"function": {"parsed_arguments": True}}
+                                                for idx, _ in enumerate(choice_snapshot.message.tool_calls or [])
+                                            },
+                                        },
+                                    ),
+                                ),
+                            ),
+                            cast("dict[object, object]", choice.delta.to_dict()),
+                        ),
+                    ),
+                )
+
+                # ensure tools that have already been parsed are added back into the newly
+                # constructed message snapshot
+                for tool_index, prev_tool in enumerate(previous_tool_calls):
+                    new_tool = (choice_snapshot.message.tool_calls or [])[tool_index]
+
+                    if prev_tool.type == "function":
+                        assert new_tool.type == "function"
+                        new_tool.function.parsed_arguments = prev_tool.function.parsed_arguments
+                    elif TYPE_CHECKING:  # type: ignore[unreachable]
+                        assert_never(prev_tool)
+            except IndexError:
+                choice_snapshot = cast(
+                    ParsedChoiceSnapshot,
+                    construct_type(
+                        type_=ParsedChoiceSnapshot,
+                        value={
+                            **choice.model_dump(exclude_unset=True, exclude={"delta"}),
+                            "message": choice.delta.to_dict(),
+                        },
+                    ),
+                )
+                completion_snapshot.choices.append(choice_snapshot)
+
+            if choice.finish_reason:
+                choice_snapshot.finish_reason = choice.finish_reason
+
+                if has_parseable_input(response_format=self._response_format, input_tools=self._input_tools):
+                    if choice.finish_reason == "length":
+                        # at the time of writing, `.usage` will always be `None` but
+                        # we include it here in case that is changed in the future
+                        raise LengthFinishReasonError(completion=completion_snapshot)
+
+                    if choice.finish_reason == "content_filter":
+                        raise ContentFilterFinishReasonError()
+
+            if (
+                choice_snapshot.message.content
+                and not choice_snapshot.message.refusal
+                and is_given(self._rich_response_format)
+            ):
+                choice_snapshot.message.parsed = from_json(
+                    bytes(choice_snapshot.message.content, "utf-8"),
+                    partial_mode=True,
+                )
+
+            for tool_call_chunk in choice.delta.tool_calls or []:
+                tool_call_snapshot = (choice_snapshot.message.tool_calls or [])[tool_call_chunk.index]
+
+                if tool_call_snapshot.type == "function":
+                    input_tool = get_input_tool_by_name(
+                        input_tools=self._input_tools, name=tool_call_snapshot.function.name
+                    )
+
+                    if (
+                        input_tool
+                        and input_tool.get("function", {}).get("strict")
+                        and tool_call_snapshot.function.arguments
+                    ):
+                        tool_call_snapshot.function.parsed_arguments = from_json(
+                            bytes(tool_call_snapshot.function.arguments, "utf-8"),
+                            partial_mode=True,
+                        )
+                elif TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(tool_call_snapshot)
+
+            if choice.logprobs is not None:
+                if choice_snapshot.logprobs is None:
+                    choice_snapshot.logprobs = build(
+                        ChoiceLogprobs,
+                        content=choice.logprobs.content,
+                        refusal=choice.logprobs.refusal,
+                    )
+                else:
+                    if choice.logprobs.content:
+                        if choice_snapshot.logprobs.content is None:
+                            choice_snapshot.logprobs.content = []
+
+                        choice_snapshot.logprobs.content.extend(choice.logprobs.content)
+
+                    if choice.logprobs.refusal:
+                        if choice_snapshot.logprobs.refusal is None:
+                            choice_snapshot.logprobs.refusal = []
+
+                        choice_snapshot.logprobs.refusal.extend(choice.logprobs.refusal)
+
+        completion_snapshot.usage = chunk.usage
+        completion_snapshot.system_fingerprint = chunk.system_fingerprint
+
+        return completion_snapshot
+
+    def _build_events(
+        self,
+        *,
+        chunk: ChatCompletionChunk,
+        completion_snapshot: ParsedChatCompletionSnapshot,
+    ) -> list[ChatCompletionStreamEvent[ResponseFormatT]]:
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+        events_to_fire.append(
+            build(ChunkEvent, type="chunk", chunk=chunk, snapshot=completion_snapshot),
+        )
+
+        for choice in chunk.choices:
+            choice_state = self._get_choice_state(choice)
+            choice_snapshot = completion_snapshot.choices[choice.index]
+
+            if choice.delta.content is not None and choice_snapshot.message.content is not None:
+                events_to_fire.append(
+                    build(
+                        ContentDeltaEvent,
+                        type="content.delta",
+                        delta=choice.delta.content,
+                        snapshot=choice_snapshot.message.content,
+                        parsed=choice_snapshot.message.parsed,
+                    )
+                )
+
+            if choice.delta.refusal is not None and choice_snapshot.message.refusal is not None:
+                events_to_fire.append(
+                    build(
+                        RefusalDeltaEvent,
+                        type="refusal.delta",
+                        delta=choice.delta.refusal,
+                        snapshot=choice_snapshot.message.refusal,
+                    )
+                )
+
+            if choice.delta.tool_calls:
+                tool_calls = choice_snapshot.message.tool_calls
+                assert tool_calls is not None
+
+                for tool_call_delta in choice.delta.tool_calls:
+                    tool_call = tool_calls[tool_call_delta.index]
+
+                    if tool_call.type == "function":
+                        assert tool_call_delta.function is not None
+                        events_to_fire.append(
+                            build(
+                                FunctionToolCallArgumentsDeltaEvent,
+                                type="tool_calls.function.arguments.delta",
+                                name=tool_call.function.name,
+                                index=tool_call_delta.index,
+                                arguments=tool_call.function.arguments,
+                                parsed_arguments=tool_call.function.parsed_arguments,
+                                arguments_delta=tool_call_delta.function.arguments or "",
+                            )
+                        )
+                    elif TYPE_CHECKING:  # type: ignore[unreachable]
+                        assert_never(tool_call)
+
+            if choice.logprobs is not None and choice_snapshot.logprobs is not None:
+                if choice.logprobs.content and choice_snapshot.logprobs.content:
+                    events_to_fire.append(
+                        build(
+                            LogprobsContentDeltaEvent,
+                            type="logprobs.content.delta",
+                            content=choice.logprobs.content,
+                            snapshot=choice_snapshot.logprobs.content,
+                        ),
+                    )
+
+                if choice.logprobs.refusal and choice_snapshot.logprobs.refusal:
+                    events_to_fire.append(
+                        build(
+                            LogprobsRefusalDeltaEvent,
+                            type="logprobs.refusal.delta",
+                            refusal=choice.logprobs.refusal,
+                            snapshot=choice_snapshot.logprobs.refusal,
+                        ),
+                    )
+
+            events_to_fire.extend(
+                choice_state.get_done_events(
+                    choice_chunk=choice,
+                    choice_snapshot=choice_snapshot,
+                    response_format=self._response_format,
+                )
+            )
+
+        return events_to_fire
+
+
+class ChoiceEventState:
+    def __init__(self, *, input_tools: list[ChatCompletionToolParam]) -> None:
+        self._input_tools = input_tools
+
+        self._content_done = False
+        self._refusal_done = False
+        self._logprobs_content_done = False
+        self._logprobs_refusal_done = False
+        self._done_tool_calls: set[int] = set()
+        self.__current_tool_call_index: int | None = None
+
+    def get_done_events(
+        self,
+        *,
+        choice_chunk: ChoiceChunk,
+        choice_snapshot: ParsedChoiceSnapshot,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+    ) -> list[ChatCompletionStreamEvent[ResponseFormatT]]:
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+        if choice_snapshot.finish_reason:
+            events_to_fire.extend(
+                self._content_done_events(choice_snapshot=choice_snapshot, response_format=response_format)
+            )
+
+            if (
+                self.__current_tool_call_index is not None
+                and self.__current_tool_call_index not in self._done_tool_calls
+            ):
+                self._add_tool_done_event(
+                    events_to_fire=events_to_fire,
+                    choice_snapshot=choice_snapshot,
+                    tool_index=self.__current_tool_call_index,
+                )
+
+        for tool_call in choice_chunk.delta.tool_calls or []:
+            if self.__current_tool_call_index != tool_call.index:
+                events_to_fire.extend(
+                    self._content_done_events(choice_snapshot=choice_snapshot, response_format=response_format)
+                )
+
+                if self.__current_tool_call_index is not None:
+                    self._add_tool_done_event(
+                        events_to_fire=events_to_fire,
+                        choice_snapshot=choice_snapshot,
+                        tool_index=self.__current_tool_call_index,
+                    )
+
+            self.__current_tool_call_index = tool_call.index
+
+        return events_to_fire
+
+    def _content_done_events(
+        self,
+        *,
+        choice_snapshot: ParsedChoiceSnapshot,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+    ) -> list[ChatCompletionStreamEvent[ResponseFormatT]]:
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+        if choice_snapshot.message.content and not self._content_done:
+            self._content_done = True
+
+            parsed = maybe_parse_content(
+                response_format=response_format,
+                message=choice_snapshot.message,
+            )
+
+            # update the parsed content to now use the richer `response_format`
+            # as opposed to the raw JSON-parsed object as the content is now
+            # complete and can be fully validated.
+            choice_snapshot.message.parsed = parsed
+
+            events_to_fire.append(
+                build(
+                    # we do this dance so that when the `ContentDoneEvent` instance
+                    # is printed at runtime the class name will include the solved
+                    # type variable, e.g. `ContentDoneEvent[MyModelType]`
+                    cast(  # pyright: ignore[reportUnnecessaryCast]
+                        "type[ContentDoneEvent[ResponseFormatT]]",
+                        cast(Any, ContentDoneEvent)[solve_response_format_t(response_format)],
+                    ),
+                    type="content.done",
+                    content=choice_snapshot.message.content,
+                    parsed=parsed,
+                ),
+            )
+
+        if choice_snapshot.message.refusal is not None and not self._refusal_done:
+            self._refusal_done = True
+            events_to_fire.append(
+                build(RefusalDoneEvent, type="refusal.done", refusal=choice_snapshot.message.refusal),
+            )
+
+        if (
+            choice_snapshot.logprobs is not None
+            and choice_snapshot.logprobs.content is not None
+            and not self._logprobs_content_done
+        ):
+            self._logprobs_content_done = True
+            events_to_fire.append(
+                build(LogprobsContentDoneEvent, type="logprobs.content.done", content=choice_snapshot.logprobs.content),
+            )
+
+        if (
+            choice_snapshot.logprobs is not None
+            and choice_snapshot.logprobs.refusal is not None
+            and not self._logprobs_refusal_done
+        ):
+            self._logprobs_refusal_done = True
+            events_to_fire.append(
+                build(LogprobsRefusalDoneEvent, type="logprobs.refusal.done", refusal=choice_snapshot.logprobs.refusal),
+            )
+
+        return events_to_fire
+
+    def _add_tool_done_event(
+        self,
+        *,
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]],
+        choice_snapshot: ParsedChoiceSnapshot,
+        tool_index: int,
+    ) -> None:
+        if tool_index in self._done_tool_calls:
+            return
+
+        self._done_tool_calls.add(tool_index)
+
+        assert choice_snapshot.message.tool_calls is not None
+        tool_call_snapshot = choice_snapshot.message.tool_calls[tool_index]
+
+        if tool_call_snapshot.type == "function":
+            parsed_arguments = parse_function_tool_arguments(
+                input_tools=self._input_tools, function=tool_call_snapshot.function
+            )
+
+            # update the parsed content to potentially use a richer type
+            # as opposed to the raw JSON-parsed object as the content is now
+            # complete and can be fully validated.
+            tool_call_snapshot.function.parsed_arguments = parsed_arguments
+
+            events_to_fire.append(
+                build(
+                    FunctionToolCallArgumentsDoneEvent,
+                    type="tool_calls.function.arguments.done",
+                    index=tool_index,
+                    name=tool_call_snapshot.function.name,
+                    arguments=tool_call_snapshot.function.arguments,
+                    parsed_arguments=parsed_arguments,
+                )
+            )
+        elif TYPE_CHECKING:  # type: ignore[unreachable]
+            assert_never(tool_call_snapshot)
+
+
+def _convert_initial_chunk_into_snapshot(chunk: ChatCompletionChunk) -> ParsedChatCompletionSnapshot:
+    data = chunk.to_dict()
+    choices = cast("list[object]", data["choices"])
+
+    for choice in chunk.choices:
+        choices[choice.index] = {
+            **choice.model_dump(exclude_unset=True, exclude={"delta"}),
+            "message": choice.delta.to_dict(),
+        }
+
+    return cast(
+        ParsedChatCompletionSnapshot,
+        construct_type(
+            type_=ParsedChatCompletionSnapshot,
+            value={
+                "system_fingerprint": None,
+                **data,
+                "object": "chat.completion",
+            },
+        ),
+    )
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_events.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_events.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4c1f28300b4f63da95a87613e78c6f4a4e897b7
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_events.py
@@ -0,0 +1,123 @@
+from typing import List, Union, Generic, Optional
+from typing_extensions import Literal
+
+from ._types import ParsedChatCompletionSnapshot
+from ...._models import BaseModel, GenericModel
+from ..._parsing import ResponseFormatT
+from ....types.chat import ChatCompletionChunk, ChatCompletionTokenLogprob
+
+
+class ChunkEvent(BaseModel):
+    type: Literal["chunk"]
+
+    chunk: ChatCompletionChunk
+
+    snapshot: ParsedChatCompletionSnapshot
+
+
+class ContentDeltaEvent(BaseModel):
+    """This event is yielded for every chunk with `choice.delta.content` data."""
+
+    type: Literal["content.delta"]
+
+    delta: str
+
+    snapshot: str
+
+    parsed: Optional[object] = None
+
+
+class ContentDoneEvent(GenericModel, Generic[ResponseFormatT]):
+    type: Literal["content.done"]
+
+    content: str
+
+    parsed: Optional[ResponseFormatT] = None
+
+
+class RefusalDeltaEvent(BaseModel):
+    type: Literal["refusal.delta"]
+
+    delta: str
+
+    snapshot: str
+
+
+class RefusalDoneEvent(BaseModel):
+    type: Literal["refusal.done"]
+
+    refusal: str
+
+
+class FunctionToolCallArgumentsDeltaEvent(BaseModel):
+    type: Literal["tool_calls.function.arguments.delta"]
+
+    name: str
+
+    index: int
+
+    arguments: str
+    """Accumulated raw JSON string"""
+
+    parsed_arguments: object
+    """The parsed arguments so far"""
+
+    arguments_delta: str
+    """The JSON string delta"""
+
+
+class FunctionToolCallArgumentsDoneEvent(BaseModel):
+    type: Literal["tool_calls.function.arguments.done"]
+
+    name: str
+
+    index: int
+
+    arguments: str
+    """Accumulated raw JSON string"""
+
+    parsed_arguments: object
+    """The parsed arguments"""
+
+
+class LogprobsContentDeltaEvent(BaseModel):
+    type: Literal["logprobs.content.delta"]
+
+    content: List[ChatCompletionTokenLogprob]
+
+    snapshot: List[ChatCompletionTokenLogprob]
+
+
+class LogprobsContentDoneEvent(BaseModel):
+    type: Literal["logprobs.content.done"]
+
+    content: List[ChatCompletionTokenLogprob]
+
+
+class LogprobsRefusalDeltaEvent(BaseModel):
+    type: Literal["logprobs.refusal.delta"]
+
+    refusal: List[ChatCompletionTokenLogprob]
+
+    snapshot: List[ChatCompletionTokenLogprob]
+
+
+class LogprobsRefusalDoneEvent(BaseModel):
+    type: Literal["logprobs.refusal.done"]
+
+    refusal: List[ChatCompletionTokenLogprob]
+
+
+ChatCompletionStreamEvent = Union[
+    ChunkEvent,
+    ContentDeltaEvent,
+    ContentDoneEvent[ResponseFormatT],
+    RefusalDeltaEvent,
+    RefusalDoneEvent,
+    FunctionToolCallArgumentsDeltaEvent,
+    FunctionToolCallArgumentsDoneEvent,
+    LogprobsContentDeltaEvent,
+    LogprobsContentDoneEvent,
+    LogprobsRefusalDeltaEvent,
+    LogprobsRefusalDoneEvent,
+]
diff --git a/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_types.py b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..42552893a0566c4d9821c57fc6bfea01479f23eb
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/lib/streaming/chat/_types.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing_extensions import TypeAlias
+
+from ....types.chat import ParsedChoice, ParsedChatCompletion, ParsedChatCompletionMessage
+
+ParsedChatCompletionSnapshot: TypeAlias = ParsedChatCompletion[object]
+"""Snapshot type representing an in-progress accumulation of
+a `ParsedChatCompletion` object.
+"""
+
+ParsedChatCompletionMessageSnapshot: TypeAlias = ParsedChatCompletionMessage[object]
+"""Snapshot type representing an in-progress accumulation of
+a `ParsedChatCompletionMessage` object.
+
+If the content has been fully accumulated, the `.parsed` content will be
+the `response_format` instance, otherwise it'll be the raw JSON parsed version.
+"""
+
+ParsedChoiceSnapshot: TypeAlias = ParsedChoice[object]
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/__init__.py b/.venv/lib/python3.11/site-packages/openai/resources/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2cc1c4b0cf6b0cc113a0f4684e095512ce76ebe
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/__init__.py
@@ -0,0 +1,173 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .images import (
+    Images,
+    AsyncImages,
+    ImagesWithRawResponse,
+    AsyncImagesWithRawResponse,
+    ImagesWithStreamingResponse,
+    AsyncImagesWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+from .embeddings import (
+    Embeddings,
+    AsyncEmbeddings,
+    EmbeddingsWithRawResponse,
+    AsyncEmbeddingsWithRawResponse,
+    EmbeddingsWithStreamingResponse,
+    AsyncEmbeddingsWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
+from .moderations import (
+    Moderations,
+    AsyncModerations,
+    ModerationsWithRawResponse,
+    AsyncModerationsWithRawResponse,
+    ModerationsWithStreamingResponse,
+    AsyncModerationsWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+    "Embeddings",
+    "AsyncEmbeddings",
+    "EmbeddingsWithRawResponse",
+    "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "Images",
+    "AsyncImages",
+    "ImagesWithRawResponse",
+    "AsyncImagesWithRawResponse",
+    "ImagesWithStreamingResponse",
+    "AsyncImagesWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+    "Moderations",
+    "AsyncModerations",
+    "ModerationsWithRawResponse",
+    "AsyncModerationsWithRawResponse",
+    "ModerationsWithStreamingResponse",
+    "AsyncModerationsWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
+    "FineTuning",
+    "AsyncFineTuning",
+    "FineTuningWithRawResponse",
+    "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/__init__.py b/.venv/lib/python3.11/site-packages/openai/resources/audio/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7da1d2dbde84b8726c40ee18e808238f786c932b
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/audio/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Transcriptions",
+    "AsyncTranscriptions",
+    "TranscriptionsWithRawResponse",
+    "AsyncTranscriptionsWithRawResponse",
+    "TranscriptionsWithStreamingResponse",
+    "AsyncTranscriptionsWithStreamingResponse",
+    "Translations",
+    "AsyncTranslations",
+    "TranslationsWithRawResponse",
+    "AsyncTranslationsWithRawResponse",
+    "TranslationsWithStreamingResponse",
+    "AsyncTranslationsWithStreamingResponse",
+    "Speech",
+    "AsyncSpeech",
+    "SpeechWithRawResponse",
+    "AsyncSpeechWithRawResponse",
+    "SpeechWithStreamingResponse",
+    "AsyncSpeechWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..73fd4908ad35dff85f2c4faddc7f5b314d63db89
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/audio.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/audio.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d5731f23a2b38752becf112b39f1bf5d9281d61b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/audio.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/speech.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/speech.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31a770b2d1cde30bfdbedd4d39cd6b5a37c5da0e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/speech.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/transcriptions.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/transcriptions.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fe3ccc58a4b3c6a45fb4a8aac17166585daef70
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/transcriptions.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/translations.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/translations.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d36c4c9e8cb9b405de23f0c77321faf25c2a824a
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/audio/__pycache__/translations.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/audio.py b/.venv/lib/python3.11/site-packages/openai/resources/audio/audio.py
new file mode 100644
index 0000000000000000000000000000000000000000..383b7073bf0d354a356341ecdff6de7994bf6f2e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/audio/audio.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = ["Audio", "AsyncAudio"]
+
+
+class Audio(SyncAPIResource):
+    @cached_property
+    def transcriptions(self) -> Transcriptions:
+        return Transcriptions(self._client)
+
+    @cached_property
+    def translations(self) -> Translations:
+        return Translations(self._client)
+
+    @cached_property
+    def speech(self) -> Speech:
+        return Speech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AudioWithStreamingResponse(self)
+
+
+class AsyncAudio(AsyncAPIResource):
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptions:
+        return AsyncTranscriptions(self._client)
+
+    @cached_property
+    def translations(self) -> AsyncTranslations:
+        return AsyncTranslations(self._client)
+
+    @cached_property
+    def speech(self) -> AsyncSpeech:
+        return AsyncSpeech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAudioWithStreamingResponse(self)
+
+
+class AudioWithRawResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithRawResponse:
+        return TranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithRawResponse:
+        return TranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithRawResponse:
+        return SpeechWithRawResponse(self._audio.speech)
+
+
+class AsyncAudioWithRawResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
+        return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithRawResponse:
+        return AsyncTranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithRawResponse:
+        return AsyncSpeechWithRawResponse(self._audio.speech)
+
+
+class AudioWithStreamingResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self._audio.speech)
+
+
+class AsyncAudioWithStreamingResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self._audio.speech)
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/speech.py b/.venv/lib/python3.11/site-packages/openai/resources/audio/speech.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad011181614593e7a4df48458c5de0e194bb2067
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/audio/speech.py
@@ -0,0 +1,234 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...types.audio import speech_create_params
+from ..._base_client import make_request_options
+from ...types.audio.speech_model import SpeechModel
+
+__all__ = ["Speech", "AsyncSpeech"]
+
+
+class Speech(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SpeechWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, SpeechModel],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for. The maximum length is 4096 characters.
+
+          model:
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1` or `tts-1-hd`
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
+
+          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
+              the default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return self._post(
+            "/audio/speech",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "response_format": response_format,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class AsyncSpeech(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSpeechWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, SpeechModel],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for. The maximum length is 4096 characters.
+
+          model:
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1` or `tts-1-hd`
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
+
+          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
+              the default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return await self._post(
+            "/audio/speech",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "response_format": response_format,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class SpeechWithRawResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class AsyncSpeechWithRawResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class SpeechWithStreamingResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_streamed_response_wrapper(
+            speech.create,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/transcriptions.py b/.venv/lib/python3.11/site-packages/openai/resources/audio/transcriptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..f338ad067d96dc9dd1f3bc0553b9e6b5ac3f2506
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/audio/transcriptions.py
@@ -0,0 +1,415 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, List, Union, Mapping, cast
+from typing_extensions import Literal, overload, assert_never
+
+import httpx
+
+from ... import _legacy_response
+from ...types import AudioResponseFormat
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...types.audio import transcription_create_params
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio.transcription import Transcription
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.transcription_verbose import TranscriptionVerbose
+
+__all__ = ["Transcriptions", "AsyncTranscriptions"]
+
+log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
+
+
+class Transcriptions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranscriptionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionVerbose: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription | TranscriptionVerbose | str:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(  # type: ignore[return-value]
+            "/audio/transcriptions",
+            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+        )
+
+
+class AsyncTranscriptions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranscriptionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionVerbose: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription | TranscriptionVerbose | str:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/audio/transcriptions",
+            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+        )
+
+
+class TranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class TranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+def _get_response_format_type(
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
+) -> type[Transcription | TranscriptionVerbose | str]:
+    if isinstance(response_format, NotGiven) or response_format is None:  # pyright: ignore[reportUnnecessaryComparison]
+        return Transcription
+
+    if response_format == "json":
+        return Transcription
+    elif response_format == "verbose_json":
+        return TranscriptionVerbose
+    elif response_format == "srt" or response_format == "text" or response_format == "vtt":
+        return str
+    elif TYPE_CHECKING:  # type: ignore[unreachable]
+        assert_never(response_format)
+    else:
+        log.warn("Unexpected audio response format: %s", response_format)
+        return Transcription
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/audio/translations.py b/.venv/lib/python3.11/site-packages/openai/resources/audio/translations.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd3132dc579b7f8e43d526212e7d7c71ced4f174
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/audio/translations.py
@@ -0,0 +1,373 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Union, Mapping, cast
+from typing_extensions import Literal, overload, assert_never
+
+import httpx
+
+from ... import _legacy_response
+from ...types import AudioResponseFormat
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...types.audio import translation_create_params
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio.translation import Translation
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.translation_verbose import TranslationVerbose
+
+__all__ = ["Translations", "AsyncTranslations"]
+
+log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
+
+
+class Translations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranslationsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranslationVerbose: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation | TranslationVerbose | str:
+        """
+        Translates audio into English.
+
+        Args:
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should be in English.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(  # type: ignore[return-value]
+            "/audio/translations",
+            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+        )
+
+
+class AsyncTranslations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranslationsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranslationVerbose: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation | TranslationVerbose | str:
+        """
+        Translates audio into English.
+
+        Args:
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should be in English.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/audio/translations",
+            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_get_response_format_type(response_format),
+        )
+
+
+class TranslationsWithRawResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithRawResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class TranslationsWithStreamingResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithStreamingResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = async_to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+def _get_response_format_type(
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
+) -> type[Translation | TranslationVerbose | str]:
+    if isinstance(response_format, NotGiven) or response_format is None:  # pyright: ignore[reportUnnecessaryComparison]
+        return Translation
+
+    if response_format == "json":
+        return Translation
+    elif response_format == "verbose_json":
+        return TranslationVerbose
+    elif response_format == "srt" or response_format == "text" or response_format == "vtt":
+        return str
+    elif TYPE_CHECKING:  # type: ignore[unreachable]
+        assert_never(response_format)
+    else:
+        log.warn("Unexpected audio response format: %s", response_format)
+        return Transcription
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/batches.py b/.venv/lib/python3.11/site-packages/openai/resources/batches.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e7ec19ec23b2425d15d8912a0c52726776d1456
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/batches.py
@@ -0,0 +1,517 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import batch_list_params, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from ..types.batch import Batch
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.shared_params.metadata import Metadata
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
+              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
+              embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Batch]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=SyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
+              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
+              embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=AsyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/__init__.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..01f5338757e5c0cd2101d74782f7401e6521e8de
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+    "Assistants",
+    "AsyncAssistants",
+    "AssistantsWithRawResponse",
+    "AsyncAssistantsWithRawResponse",
+    "AssistantsWithStreamingResponse",
+    "AsyncAssistantsWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2c7dc9f3f2181d40e5c1a289f70ec764d56820e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/assistants.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/assistants.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c311dfa979c302742bbcacbfe539a501e3e12014
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/assistants.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/beta.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/beta.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11ed7f523f3eac74390426d6d6fd66d8894e1de4
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/__pycache__/beta.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/assistants.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/assistants.py
new file mode 100644
index 0000000000000000000000000000000000000000..65b7c9cfc2dd5b3ae9655f2c1461d467c816c7aa
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/assistants.py
@@ -0,0 +1,897 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ...types.beta import (
+    assistant_list_params,
+    assistant_create_params,
+    assistant_update_params,
+)
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.chat_model import ChatModel
+from ...types.beta.assistant import Assistant
+from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.beta.assistant_tool_param import AssistantToolParam
+from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Assistants", "AsyncAssistants"]
+
+
+class Assistants(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AssistantsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, ChatModel],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/assistants",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/assistants/{assistant_id}",
+            body=maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Assistant]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=SyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AsyncAssistants(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAssistantsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, ChatModel],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/assistants",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    async def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    async def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/assistants/{assistant_id}",
+            body=await async_maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=AsyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    async def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AssistantsWithRawResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithRawResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AssistantsWithStreamingResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithStreamingResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = async_to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            assistants.delete,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/beta.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/beta.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d71cff3f19cb4f4978afa12c25fab1c22cb50b5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/beta.py
@@ -0,0 +1,207 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .chat.chat import Chat, AsyncChat
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .threads.threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .realtime.realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .vector_stores.vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def chat(self) -> Chat:
+        return Chat(self._client)
+
+    @cached_property
+    def realtime(self) -> Realtime:
+        return Realtime(self._client)
+
+    @cached_property
+    def vector_stores(self) -> VectorStores:
+        return VectorStores(self._client)
+
+    @cached_property
+    def assistants(self) -> Assistants:
+        return Assistants(self._client)
+
+    @cached_property
+    def threads(self) -> Threads:
+        return Threads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def chat(self) -> AsyncChat:
+        return AsyncChat(self._client)
+
+    @cached_property
+    def realtime(self) -> AsyncRealtime:
+        return AsyncRealtime(self._client)
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStores:
+        return AsyncVectorStores(self._client)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistants:
+        return AsyncAssistants(self._client)
+
+    @cached_property
+    def threads(self) -> AsyncThreads:
+        return AsyncThreads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> RealtimeWithRawResponse:
+        return RealtimeWithRawResponse(self._beta.realtime)
+
+    @cached_property
+    def vector_stores(self) -> VectorStoresWithRawResponse:
+        return VectorStoresWithRawResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithRawResponse:
+        return AssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithRawResponse:
+        return ThreadsWithRawResponse(self._beta.threads)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithRawResponse:
+        return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
+        return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithRawResponse:
+        return AsyncAssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithRawResponse:
+        return AsyncThreadsWithRawResponse(self._beta.threads)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> RealtimeWithStreamingResponse:
+        return RealtimeWithStreamingResponse(self._beta.realtime)
+
+    @cached_property
+    def vector_stores(self) -> VectorStoresWithStreamingResponse:
+        return VectorStoresWithStreamingResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithStreamingResponse:
+        return AssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self._beta.threads)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+        return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
+        return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithStreamingResponse:
+        return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self._beta.threads)
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__init__.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a66e445b1f3dfc6ffdbfb7c95031440ffc0f6994
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1d565f09f8303f6ed7bda190454ee0539826011a
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/messages.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/messages.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c1e999774a58d5a1383ec6790009b4af47fab07
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/messages.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/threads.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/threads.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d5a89f545e7a5c5b62cf045a450b4873b6712c4f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/__pycache__/threads.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/messages.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/messages.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3374aba371a68336a8788776126617d6e955e2f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/messages.py
@@ -0,0 +1,670 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.threads import message_list_params, message_create_params, message_update_params
+from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.threads.message_deleted import MessageDeleted
+from ....types.beta.threads.message_content_part_param import MessageContentPartParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    def create(
+        self,
+        thread_id: str,
+        *,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages",
+            body=maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Message]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=SyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages",
+            body=await async_maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    async def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    async def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=AsyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    async def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            messages.delete,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            messages.delete,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            messages.delete,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            messages.delete,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__init__.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..50aa9fae60c7f32a60e11ff376c63cec58a5ff34
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
+
+__all__ = [
+    "Steps",
+    "AsyncSteps",
+    "StepsWithRawResponse",
+    "AsyncStepsWithRawResponse",
+    "StepsWithStreamingResponse",
+    "AsyncStepsWithStreamingResponse",
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+]
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ac13cdf456f0d687b452133f80ddb5e7098df97
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/runs.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/runs.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49e0b12bf20189c1bc5d578cdf4627da8c76384b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/runs.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/steps.cpython-311.pyc b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/steps.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5474590cea13f3834952c37a6d0c851a5732ee17
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/__pycache__/steps.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/runs.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/runs.py
new file mode 100644
index 0000000000000000000000000000000000000000..13301ad507e19eaf33af55bdf1a36f0b70cb4847
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/runs.py
@@ -0,0 +1,2920 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import typing_extensions
+from typing import List, Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..... import _legacy_response
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from .....types.chat_model import ChatModel
+from .....types.beta.threads import (
+    run_list_params,
+    run_create_params,
+    run_update_params,
+    run_submit_tool_outputs_params,
+)
+from .....types.beta.threads.run import Run
+from .....types.shared_params.metadata import Metadata
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+    @cached_property
+    def steps(self) -> Steps:
+        return Steps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RunsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Run]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=SyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            include=include,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            parallel_tool_calls=parallel_tool_calls,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
+
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
+
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=AsyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            include=include,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            parallel_tool_calls=parallel_tool_calls,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                    "parallel_tool_calls": parallel_tool_calls,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+    async def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = await self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
+
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
+
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            await self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=await async_maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class RunsWithRawResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithRawResponse:
+        return StepsWithRawResponse(self._runs.steps)
+
+
+class AsyncRunsWithRawResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithRawResponse:
+        return AsyncStepsWithRawResponse(self._runs.steps)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithStreamingResponse:
+        return StepsWithStreamingResponse(self._runs.steps)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = async_to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = async_to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithStreamingResponse:
+        return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/steps.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/steps.py
new file mode 100644
index 0000000000000000000000000000000000000000..709c729d4540c4af1b480e93171d1370c7edfcf5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/runs/steps.py
@@ -0,0 +1,381 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from ..... import _legacy_response
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
+from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+
+__all__ = ["Steps", "AsyncSteps"]
+
+
+class Steps(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> StepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return StepsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> StepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return StepsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        step_id: str,
+        *,
+        thread_id: str,
+        run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunStep:
+        """
+        Retrieves a run step.
+
+        Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+            ),
+            cast_to=RunStep,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[RunStep]:
+        """
+        Returns a list of run steps belonging to a run.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs/{run_id}/steps",
+            page=SyncCursorPage[RunStep],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    step_list_params.StepListParams,
+                ),
+            ),
+            model=RunStep,
+        )
+
+
+class AsyncSteps(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncStepsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncStepsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        step_id: str,
+        *,
+        thread_id: str,
+        run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunStep:
+        """
+        Retrieves a run step.
+
+        Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
+            ),
+            cast_to=RunStep,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
+        """
+        Returns a list of run steps belonging to a run.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs/{run_id}/steps",
+            page=AsyncCursorPage[RunStep],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    step_list_params.StepListParams,
+                ),
+            ),
+            model=RunStep,
+        )
+
+
+class StepsWithRawResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithRawResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class StepsWithStreamingResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithStreamingResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            steps.list,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/threads.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/threads.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ff8539501b4016e0b1662de45f4f8b71963ba7a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/threads/threads.py
@@ -0,0 +1,1875 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....types.beta import (
+    thread_create_params,
+    thread_update_params,
+    thread_create_and_run_params,
+)
+from ...._base_client import make_request_options
+from ....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from ....types.chat_model import ChatModel
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Threads", "AsyncThreads"]
+
+
+class Threads(SyncAPIResource):
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ThreadsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Create a thread.
+
+        Args:
+          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+              start the thread with.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/threads",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_create_params.ThreadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Retrieves a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    def update(
+        self,
+        thread_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Modifies a thread.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}",
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ThreadDeleted:
+        """
+        Delete a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ThreadDeleted,
+        )
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            parallel_tool_calls=parallel_tool_calls,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool_resources": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncThreads(AsyncAPIResource):
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncThreadsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Create a thread.
+
+        Args:
+          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+              start the thread with.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/threads",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_create_params.ThreadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    async def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Retrieves a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    async def update(
+        self,
+        thread_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Modifies a thread.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}",
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Thread,
+        )
+
+    async def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ThreadDeleted:
+        """
+        Delete a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ThreadDeleted,
+        )
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/threads/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            parallel_tool_calls=parallel_tool_calls,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.runs.poll(
+            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool_resources": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+
+class ThreadsWithRawResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._threads.messages)
+
+
+class AsyncThreadsWithRawResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._threads.messages)
+
+
+class ThreadsWithStreamingResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.create = to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._threads.messages)
+
+
+class AsyncThreadsWithStreamingResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = async_to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = async_to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._threads.messages)
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/file_batches.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/file_batches.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d61e92c7fdc4e07c698996dc6206ea0cf7b6697
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/file_batches.py
@@ -0,0 +1,785 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import asyncio
+from typing import List, Iterable
+from typing_extensions import Literal
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+
+import httpx
+import sniffio
+
+from .... import _legacy_response
+from ....types import FileObject
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.beta import FileChunkingStrategyParam
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.beta.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
+from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
+from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+
+__all__ = ["FileBatches", "AsyncFileBatches"]
+
+
+class FileBatches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FileBatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            chunking_strategy=chunking_strategy,
+        )
+        # TODO: don't poll unless necessary??
+        return self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        results: list[FileObject] = []
+
+        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
+            futures: list[Future[FileObject]] = [
+                executor.submit(
+                    self._client.files.create,
+                    file=file,
+                    purpose="assistants",
+                )
+                for file in files
+            ]
+
+        for future in as_completed(futures):
+            exc = future.exception()
+            if exc:
+                raise exc
+
+            results.append(future.result())
+
+        batch = self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in results)],
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+        return batch
+
+
+class AsyncFileBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFileBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = await self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            chunking_strategy=chunking_strategy,
+        )
+        # TODO: don't poll unless necessary??
+        return await self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    async def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        uploaded_files: list[FileObject] = []
+
+        async_library = sniffio.current_async_library()
+
+        if async_library == "asyncio":
+
+            async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
+                async with semaphore:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            semaphore = asyncio.Semaphore(max_concurrency)
+
+            tasks = [asyncio_upload_file(semaphore, file) for file in files]
+
+            await asyncio.gather(*tasks)
+        elif async_library == "trio":
+            # We only import if the library is being used.
+            # We support Python 3.7 so are using an older version of trio that does not have type information
+            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
+
+            async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
+                async with limiter:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            limiter = trio.CapacityLimiter(max_concurrency)
+
+            async with trio.open_nursery() as nursery:
+                for file in files:
+                    nursery.start_soon(trio_upload_file, limiter, file)  # pyright: ignore [reportUnknownMemberType]
+        else:
+            raise RuntimeError(
+                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
+            )
+
+        batch = await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+        return batch
+
+
+class FileBatchesWithRawResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class FileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = async_to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/files.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/files.py
new file mode 100644
index 0000000000000000000000000000000000000000..febf27a7537c12d0ad2a752fd866c60489f9edf2
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/files.py
@@ -0,0 +1,726 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Literal, assert_never
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.beta import FileChunkingStrategyParam
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.beta.vector_stores import file_list_params, file_create_params
+from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
+from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
+from ....types.beta.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=maybe_transform(
+                {
+                    "file_id": file_id,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
+
+        return self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)
+
+    def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            chunking_strategy=chunking_strategy,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=await async_maybe_transform(
+                {
+                    "file_id": file_id,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    async def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
+
+        return await self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    async def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    async def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create(
+            vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy
+        )
+
+    async def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/vector_stores.py b/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/vector_stores.py
new file mode 100644
index 0000000000000000000000000000000000000000..1da52fb3c7fdfe3bf4dbb9268dfd0cafc1898d67
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/beta/vector_stores/vector_stores.py
@@ -0,0 +1,728 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.beta import (
+    FileChunkingStrategyParam,
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_update_params,
+)
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.beta.vector_store import VectorStore
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.vector_store_deleted import VectorStoreDeleted
+from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["VectorStores", "AsyncVectorStores"]
+
+
+class VectorStores(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
+
+    @cached_property
+    def file_batches(self) -> FileBatches:
+        return FileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> VectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return VectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return VectorStoresWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/vector_stores",
+            body=maybe_transform(
+                {
+                    "chunking_strategy": chunking_strategy,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStore]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=SyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+
+class AsyncVectorStores(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatches:
+        return AsyncFileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncVectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncVectorStoresWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/vector_stores",
+            body=await async_maybe_transform(
+                {
+                    "chunking_strategy": chunking_strategy,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=AsyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    async def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+
+class VectorStoresWithRawResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithRawResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class VectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = async_to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self._vector_stores.file_batches)
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/completions.py b/.venv/lib/python3.11/site-packages/openai/resources/completions.py
new file mode 100644
index 0000000000000000000000000000000000000000..171f50935295c1c7ee5f7a0b7ca110c49eba1e42
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/completions.py
@@ -0,0 +1,1148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .. import _legacy_response
+from ..types import completion_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._streaming import Stream, AsyncStream
+from .._base_client import (
+    make_request_options,
+)
+from ..types.completion import Completion
+from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: Literal[True],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: bool,
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        return self._post(
+            "/completions",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "prompt": prompt,
+                    "best_of": best_of,
+                    "echo": echo,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "suffix": suffix,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=Stream[Completion],
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: Literal[True],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: bool,
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        return await self._post(
+            "/completions",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "prompt": prompt,
+                    "best_of": best_of,
+                    "echo": echo,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "suffix": suffix,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=AsyncStream[Completion],
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/embeddings.py b/.venv/lib/python3.11/site-packages/openai/resources/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..382a42340eaadef96ab613ceda421435c86e770b
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/embeddings.py
@@ -0,0 +1,285 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import base64
+from typing import List, Union, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import embedding_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import is_given, maybe_transform
+from .._compat import cached_property
+from .._extras import numpy as np, has_numpy
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import make_request_options
+from ..types.embedding_model import EmbeddingModel
+from ..types.create_embedding_response import CreateEmbeddingResponse
+
+__all__ = ["Embeddings", "AsyncEmbeddings"]
+
+
+class Embeddings(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return EmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return EmbeddingsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | NotGiven = NOT_GIVEN,
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model (8192 tokens for
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
+          encoding_format: The format to return the embeddings in. Can be either `float` or
+              [`base64`](https://pypi.org/project/pybase64/).
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        params = {
+            "input": input,
+            "model": model,
+            "user": user,
+            "dimensions": dimensions,
+            "encoding_format": encoding_format,
+        }
+        if not is_given(encoding_format) and has_numpy():
+            params["encoding_format"] = "base64"
+
+        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
+            if is_given(encoding_format):
+                # don't modify the response object if a user explicitly asked for a format
+                return obj
+
+            for embedding in obj.data:
+                data = cast(object, embedding.embedding)
+                if not isinstance(data, str):
+                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
+                    continue
+
+                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                    base64.b64decode(data), dtype="float32"
+                ).tolist()
+
+            return obj
+
+        return self._post(
+            "/embeddings",
+            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class AsyncEmbeddings(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncEmbeddingsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | NotGiven = NOT_GIVEN,
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model (8192 tokens for
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
+          encoding_format: The format to return the embeddings in. Can be either `float` or
+              [`base64`](https://pypi.org/project/pybase64/).
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        params = {
+            "input": input,
+            "model": model,
+            "user": user,
+            "dimensions": dimensions,
+            "encoding_format": encoding_format,
+        }
+        if not is_given(encoding_format) and has_numpy():
+            params["encoding_format"] = "base64"
+
+        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
+            if is_given(encoding_format):
+                # don't modify the response object if a user explicitly asked for a format
+                return obj
+
+            for embedding in obj.data:
+                data = cast(object, embedding.embedding)
+                if not isinstance(data, str):
+                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
+                    continue
+
+                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                    base64.b64decode(data), dtype="float32"
+                ).tolist()
+
+            return obj
+
+        return await self._post(
+            "/embeddings",
+            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class EmbeddingsWithRawResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithRawResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class EmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = to_streamed_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = async_to_streamed_response_wrapper(
+            embeddings.create,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/files.py b/.venv/lib/python3.11/site-packages/openai/resources/files.py
new file mode 100644
index 0000000000000000000000000000000000000000..af453e1e21527f03655ef52fc7424677927a560e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/files.py
@@ -0,0 +1,775 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import time
+import typing_extensions
+from typing import Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import FilePurpose, file_list_params, file_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.file_object import FileObject
+from ..types.file_deleted import FileDeleted
+from ..types.file_purpose import FilePurpose
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """Upload a file that can be used across various endpoints.
+
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
+
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
+
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
+
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
+
+        Please [contact us](https://help.openai.com/) if you need to increase these
+        storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file.
+
+              Use "assistants" for
+              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
+              [Message](https://platform.openai.com/docs/api-reference/messages) files,
+              "vision" for Assistants image file inputs, "batch" for
+              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
+              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        purpose: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FileObject]:
+        """Returns a list of files.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          purpose: Only return files with the given purpose.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/files",
+            page=SyncCursorPage[FileObject],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=FileObject,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileDeleted,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    @typing_extensions.deprecated("The `.content()` method should be used instead")
+    def retrieve_content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=str,
+        )
+
+    def wait_for_processing(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 5.0,
+        max_wait_seconds: float = 30 * 60,
+    ) -> FileObject:
+        """Waits for the given file to be processed, default timeout is 30 mins."""
+        TERMINAL_STATES = {"processed", "error", "deleted"}
+
+        start = time.time()
+        file = self.retrieve(id)
+        while file.status not in TERMINAL_STATES:
+            self._sleep(poll_interval)
+
+            file = self.retrieve(id)
+            if time.time() - start > max_wait_seconds:
+                raise RuntimeError(
+                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
+                )
+
+        return file
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """Upload a file that can be used across various endpoints.
+
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
+
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
+
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
+
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
+
+        Please [contact us](https://help.openai.com/) if you need to increase these
+        storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file.
+
+              Use "assistants" for
+              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
+              [Message](https://platform.openai.com/docs/api-reference/messages) files,
+              "vision" for Assistants image file inputs, "batch" for
+              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
+              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/files",
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileObject,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        purpose: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]:
+        """Returns a list of files.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          purpose: Only return files with the given purpose.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/files",
+            page=AsyncCursorPage[FileObject],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=FileObject,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileDeleted,
+        )
+
+    async def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    @typing_extensions.deprecated("The `.content()` method should be used instead")
+    async def retrieve_content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=str,
+        )
+
+    async def wait_for_processing(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 5.0,
+        max_wait_seconds: float = 30 * 60,
+    ) -> FileObject:
+        """Waits for the given file to be processed, default timeout is 30 mins."""
+        TERMINAL_STATES = {"processed", "error", "deleted"}
+
+        start = time.time()
+        file = await self.retrieve(id)
+        while file.status not in TERMINAL_STATES:
+            await self._sleep(poll_interval)
+
+            file = await self.retrieve(id)
+            if time.time() - start > max_wait_seconds:
+                raise RuntimeError(
+                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
+                )
+
+        return file
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_streamed_response_wrapper(
+            files.content,
+            StreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_custom_streamed_response_wrapper(
+            files.content,
+            AsyncStreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/images.py b/.venv/lib/python3.11/site-packages/openai/resources/images.py
new file mode 100644
index 0000000000000000000000000000000000000000..30473c14f773de55e3e890ac33bfff73c7e7b6f5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/images.py
@@ -0,0 +1,600 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Mapping, Optional, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import image_edit_params, image_generate_params, image_create_variation_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import make_request_options
+from ..types.image_model import ImageModel
+from ..types.images_response import ImagesResponse
+
+__all__ = ["Images", "AsyncImages"]
+
+
+class Images(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ImagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ImagesWithStreamingResponse(self)
+
+    def create_variation(
+        self,
+        *,
+        image: FileTypes,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates a variation of a given image.
+
+        Args:
+          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
+              less than 4MB, and square.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/images/variations",
+            body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    def edit(
+        self,
+        *,
+        image: FileTypes,
+        prompt: str,
+        mask: FileTypes | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an edited or extended image given an original image and a prompt.
+
+        Args:
+          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
+              is not provided, image must have transparency, which will be used as the mask.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "prompt": prompt,
+                "mask": mask,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/images/edits",
+            body=maybe_transform(body, image_edit_params.ImageEditParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    def generate(
+        self,
+        *,
+        prompt: str,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an image given a prompt.
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+
+          model: The model to use for image generation.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          quality: The quality of the image that will be generated. `hd` creates images with finer
+              details and greater consistency across the image. This param is only supported
+              for `dall-e-3`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
+              `1024x1792` for `dall-e-3` models.
+
+          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
+              causes the model to lean towards generating hyper-real and dramatic images.
+              Natural causes the model to produce more natural, less hyper-real looking
+              images. This param is only supported for `dall-e-3`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/images/generations",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "model": model,
+                    "n": n,
+                    "quality": quality,
+                    "response_format": response_format,
+                    "size": size,
+                    "style": style,
+                    "user": user,
+                },
+                image_generate_params.ImageGenerateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+
+class AsyncImages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncImagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncImagesWithStreamingResponse(self)
+
+    async def create_variation(
+        self,
+        *,
+        image: FileTypes,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates a variation of a given image.
+
+        Args:
+          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
+              less than 4MB, and square.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/images/variations",
+            body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    async def edit(
+        self,
+        *,
+        image: FileTypes,
+        prompt: str,
+        mask: FileTypes | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an edited or extended image given an original image and a prompt.
+
+        Args:
+          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
+              is not provided, image must have transparency, which will be used as the mask.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "prompt": prompt,
+                "mask": mask,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/images/edits",
+            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an image given a prompt.
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+
+          model: The model to use for image generation.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          quality: The quality of the image that will be generated. `hd` creates images with finer
+              details and greater consistency across the image. This param is only supported
+              for `dall-e-3`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
+              `1024x1792` for `dall-e-3` models.
+
+          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
+              causes the model to lean towards generating hyper-real and dramatic images.
+              Natural causes the model to produce more natural, less hyper-real looking
+              images. This param is only supported for `dall-e-3`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/images/generations",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "model": model,
+                    "n": n,
+                    "quality": quality,
+                    "response_format": response_format,
+                    "size": size,
+                    "style": style,
+                    "user": user,
+                },
+                image_generate_params.ImageGenerateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ImagesResponse,
+        )
+
+
+class ImagesWithRawResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = _legacy_response.to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithRawResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = _legacy_response.async_to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.async_to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.async_to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class ImagesWithStreamingResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = to_streamed_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithStreamingResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = async_to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = async_to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = async_to_streamed_response_wrapper(
+            images.generate,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/models.py b/.venv/lib/python3.11/site-packages/openai/resources/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9693a6b0a6d1666ceef6a73833d0210c85b6f1c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/models.py
@@ -0,0 +1,306 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .. import _legacy_response
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncPage, AsyncPage
+from ..types.model import Model
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ..types.model_deleted import ModelDeleted
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Retrieves a model instance, providing basic information about the model such as
+        the owner and permissioning.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return self._get(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[Model]:
+        """
+        Lists the currently available models, and provides basic information about each
+        one such as the owner and availability.
+        """
+        return self._get_api_list(
+            "/models",
+            page=SyncPage[Model],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=Model,
+        )
+
+    def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """Delete a fine-tuned model.
+
+        You must have the Owner role in your organization to
+        delete a model.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return self._delete(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Retrieves a model instance, providing basic information about the model such as
+        the owner and permissioning.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return await self._get(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Model, AsyncPage[Model]]:
+        """
+        Lists the currently available models, and provides basic information about each
+        one such as the owner and availability.
+        """
+        return self._get_api_list(
+            "/models",
+            page=AsyncPage[Model],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=Model,
+        )
+
+    async def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """Delete a fine-tuned model.
+
+        You must have the Owner role in your organization to
+        delete a model.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return await self._delete(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            models.delete,
+        )
diff --git a/.venv/lib/python3.11/site-packages/openai/resources/moderations.py b/.venv/lib/python3.11/site-packages/openai/resources/moderations.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8f03142bc7b76d115edbe08068fcb0973d418ab
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/openai/resources/moderations.py
@@ -0,0 +1,200 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+
+import httpx
+
+from .. import _legacy_response
+from ..types import moderation_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import make_request_options
+from ..types.moderation_model import ModerationModel
+from ..types.moderation_create_response import ModerationCreateResponse
+from ..types.moderation_multi_modal_input_param import ModerationMultiModalInputParam
+
+__all__ = ["Moderations", "AsyncModerations"]
+
+
+class Moderations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ModerationsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModerationCreateResponse:
+        """Classifies if text and/or image inputs are potentially harmful.
+
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
+
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
+
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/moderations",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                },
+                moderation_create_params.ModerationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModerationCreateResponse,
+        )
+
+
+class AsyncModerations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncModerationsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModerationCreateResponse:
+        """Classifies if text and/or image inputs are potentially harmful.
+
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
+
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
+
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/moderations",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                },
+                moderation_create_params.ModerationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModerationCreateResponse,
+        )
+
+
+class ModerationsWithRawResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithRawResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class ModerationsWithStreamingResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = to_streamed_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithStreamingResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = async_to_streamed_response_wrapper(
+            moderations.create,
+        )