Spaces:
Runtime error
Runtime error
| # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. | |
| from __future__ import annotations | |
| import asyncio | |
| from typing import Dict, Iterable, Optional | |
| from typing_extensions import Union, Literal | |
| from concurrent.futures import Future, ThreadPoolExecutor, as_completed | |
| import httpx | |
| import sniffio | |
| from ... import _legacy_response | |
| from ...types import FileChunkingStrategyParam | |
| from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, SequenceNotStr, omit, not_given | |
| from ..._utils import is_given, maybe_transform, async_maybe_transform | |
| from ..._compat import cached_property | |
| from ..._resource import SyncAPIResource, AsyncAPIResource | |
| from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper | |
| from ...pagination import SyncCursorPage, AsyncCursorPage | |
| from ..._base_client import AsyncPaginator, make_request_options | |
| from ...types.file_object import FileObject | |
| from ...types.vector_stores import file_batch_create_params, file_batch_list_files_params | |
| from ...types.file_chunking_strategy_param import FileChunkingStrategyParam | |
| from ...types.vector_stores.vector_store_file import VectorStoreFile | |
| from ...types.vector_stores.vector_store_file_batch import VectorStoreFileBatch | |
| __all__ = ["FileBatches", "AsyncFileBatches"] | |
| class FileBatches(SyncAPIResource): | |
| def with_raw_response(self) -> FileBatchesWithRawResponse: | |
| """ | |
| This property can be used as a prefix for any HTTP method call to return | |
| the raw response object instead of the parsed content. | |
| For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers | |
| """ | |
| return FileBatchesWithRawResponse(self) | |
| def with_streaming_response(self) -> FileBatchesWithStreamingResponse: | |
| """ | |
| An alternative to `.with_raw_response` that doesn't eagerly read the response body. | |
| For more information, see https://www.github.com/openai/openai-python#with_streaming_response | |
| """ | |
| return FileBatchesWithStreamingResponse(self) | |
| def create( | |
| self, | |
| vector_store_id: str, | |
| *, | |
| attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit, | |
| chunking_strategy: FileChunkingStrategyParam | Omit = omit, | |
| file_ids: SequenceNotStr[str] | Omit = omit, | |
| files: Iterable[file_batch_create_params.File] | Omit = omit, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """ | |
| Create a vector store file batch. | |
| Args: | |
| attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful | |
| for storing additional information about the object in a structured format, and | |
| querying for objects via API or the dashboard. Keys are strings with a maximum | |
| length of 64 characters. Values are strings with a maximum length of 512 | |
| characters, booleans, or numbers. | |
| chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` | |
| strategy. Only applicable if `file_ids` is non-empty. | |
| file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that | |
| the vector store should use. Useful for tools like `file_search` that can access | |
| files. If `attributes` or `chunking_strategy` are provided, they will be applied | |
| to all files in the batch. The maximum batch size is 2000 files. Mutually | |
| exclusive with `files`. | |
| files: A list of objects that each include a `file_id` plus optional `attributes` or | |
| `chunking_strategy`. Use this when you need to override metadata for specific | |
| files. The global `attributes` or `chunking_strategy` will be ignored and must | |
| be specified for each file. The maximum batch size is 2000 files. Mutually | |
| exclusive with `file_ids`. | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return self._post( | |
| f"/vector_stores/{vector_store_id}/file_batches", | |
| body=maybe_transform( | |
| { | |
| "attributes": attributes, | |
| "chunking_strategy": chunking_strategy, | |
| "file_ids": file_ids, | |
| "files": files, | |
| }, | |
| file_batch_create_params.FileBatchCreateParams, | |
| ), | |
| options=make_request_options( | |
| extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout | |
| ), | |
| cast_to=VectorStoreFileBatch, | |
| ) | |
| def retrieve( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """ | |
| Retrieves a vector store file batch. | |
| Args: | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| if not batch_id: | |
| raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return self._get( | |
| f"/vector_stores/{vector_store_id}/file_batches/{batch_id}", | |
| options=make_request_options( | |
| extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout | |
| ), | |
| cast_to=VectorStoreFileBatch, | |
| ) | |
| def cancel( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """Cancel a vector store file batch. | |
| This attempts to cancel the processing of | |
| files in this batch as soon as possible. | |
| Args: | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| if not batch_id: | |
| raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return self._post( | |
| f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", | |
| options=make_request_options( | |
| extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout | |
| ), | |
| cast_to=VectorStoreFileBatch, | |
| ) | |
| def create_and_poll( | |
| self, | |
| vector_store_id: str, | |
| *, | |
| attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit, | |
| chunking_strategy: FileChunkingStrategyParam | Omit = omit, | |
| file_ids: SequenceNotStr[str] | Omit = omit, | |
| files: Iterable[file_batch_create_params.File] | Omit = omit, | |
| poll_interval_ms: int | Omit = omit, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """Create a vector store batch and poll until all files have been processed.""" | |
| batch = self.create( | |
| vector_store_id=vector_store_id, | |
| attributes=attributes, | |
| chunking_strategy=chunking_strategy, | |
| file_ids=file_ids, | |
| files=files, | |
| extra_headers=extra_headers, | |
| extra_query=extra_query, | |
| extra_body=extra_body, | |
| timeout=timeout, | |
| ) | |
| # TODO: don't poll unless necessary?? | |
| return self.poll( | |
| batch.id, | |
| vector_store_id=vector_store_id, | |
| poll_interval_ms=poll_interval_ms, | |
| ) | |
| def list_files( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| after: str | Omit = omit, | |
| before: str | Omit = omit, | |
| filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit, | |
| limit: int | Omit = omit, | |
| order: Literal["asc", "desc"] | Omit = omit, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> SyncCursorPage[VectorStoreFile]: | |
| """ | |
| Returns a list of vector store files in a batch. | |
| Args: | |
| after: A cursor for use in pagination. `after` is an object ID that defines your place | |
| in the list. For instance, if you make a list request and receive 100 objects, | |
| ending with obj_foo, your subsequent call can include after=obj_foo in order to | |
| fetch the next page of the list. | |
| before: A cursor for use in pagination. `before` is an object ID that defines your place | |
| in the list. For instance, if you make a list request and receive 100 objects, | |
| starting with obj_foo, your subsequent call can include before=obj_foo in order | |
| to fetch the previous page of the list. | |
| filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. | |
| limit: A limit on the number of objects to be returned. Limit can range between 1 and | |
| 100, and the default is 20. | |
| order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending | |
| order and `desc` for descending order. | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| if not batch_id: | |
| raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return self._get_api_list( | |
| f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", | |
| page=SyncCursorPage[VectorStoreFile], | |
| options=make_request_options( | |
| extra_headers=extra_headers, | |
| extra_query=extra_query, | |
| extra_body=extra_body, | |
| timeout=timeout, | |
| query=maybe_transform( | |
| { | |
| "after": after, | |
| "before": before, | |
| "filter": filter, | |
| "limit": limit, | |
| "order": order, | |
| }, | |
| file_batch_list_files_params.FileBatchListFilesParams, | |
| ), | |
| ), | |
| model=VectorStoreFile, | |
| ) | |
| def poll( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| poll_interval_ms: int | Omit = omit, | |
| ) -> VectorStoreFileBatch: | |
| """Wait for the given file batch to be processed. | |
| Note: this will return even if one of the files failed to process, you need to | |
| check batch.file_counts.failed_count to handle this case. | |
| """ | |
| headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"} | |
| if is_given(poll_interval_ms): | |
| headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) | |
| while True: | |
| response = self.with_raw_response.retrieve( | |
| batch_id, | |
| vector_store_id=vector_store_id, | |
| extra_headers=headers, | |
| ) | |
| batch = response.parse() | |
| if batch.file_counts.in_progress > 0: | |
| if not is_given(poll_interval_ms): | |
| from_header = response.headers.get("openai-poll-after-ms") | |
| if from_header is not None: | |
| poll_interval_ms = int(from_header) | |
| else: | |
| poll_interval_ms = 1000 | |
| self._sleep(poll_interval_ms / 1000) | |
| continue | |
| return batch | |
| def upload_and_poll( | |
| self, | |
| vector_store_id: str, | |
| *, | |
| files: Iterable[FileTypes], | |
| max_concurrency: int = 5, | |
| file_ids: SequenceNotStr[str] = [], | |
| poll_interval_ms: int | Omit = omit, | |
| chunking_strategy: FileChunkingStrategyParam | Omit = omit, | |
| ) -> VectorStoreFileBatch: | |
| """Uploads the given files concurrently and then creates a vector store file batch. | |
| If you've already uploaded certain files that you want to include in this batch | |
| then you can pass their IDs through the `file_ids` argument. | |
| By default, if any file upload fails then an exception will be eagerly raised. | |
| The number of concurrency uploads is configurable using the `max_concurrency` | |
| parameter. | |
| Note: this method only supports `asyncio` or `trio` as the backing async | |
| runtime. | |
| """ | |
| results: list[FileObject] = [] | |
| with ThreadPoolExecutor(max_workers=max_concurrency) as executor: | |
| futures: list[Future[FileObject]] = [ | |
| executor.submit( | |
| self._client.files.create, | |
| file=file, | |
| purpose="assistants", | |
| ) | |
| for file in files | |
| ] | |
| for future in as_completed(futures): | |
| exc = future.exception() | |
| if exc: | |
| raise exc | |
| results.append(future.result()) | |
| batch = self.create_and_poll( | |
| vector_store_id=vector_store_id, | |
| file_ids=[*file_ids, *(f.id for f in results)], | |
| poll_interval_ms=poll_interval_ms, | |
| chunking_strategy=chunking_strategy, | |
| ) | |
| return batch | |
| class AsyncFileBatches(AsyncAPIResource): | |
| def with_raw_response(self) -> AsyncFileBatchesWithRawResponse: | |
| """ | |
| This property can be used as a prefix for any HTTP method call to return | |
| the raw response object instead of the parsed content. | |
| For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers | |
| """ | |
| return AsyncFileBatchesWithRawResponse(self) | |
| def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse: | |
| """ | |
| An alternative to `.with_raw_response` that doesn't eagerly read the response body. | |
| For more information, see https://www.github.com/openai/openai-python#with_streaming_response | |
| """ | |
| return AsyncFileBatchesWithStreamingResponse(self) | |
| async def create( | |
| self, | |
| vector_store_id: str, | |
| *, | |
| attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit, | |
| chunking_strategy: FileChunkingStrategyParam | Omit = omit, | |
| file_ids: SequenceNotStr[str] | Omit = omit, | |
| files: Iterable[file_batch_create_params.File] | Omit = omit, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """ | |
| Create a vector store file batch. | |
| Args: | |
| attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful | |
| for storing additional information about the object in a structured format, and | |
| querying for objects via API or the dashboard. Keys are strings with a maximum | |
| length of 64 characters. Values are strings with a maximum length of 512 | |
| characters, booleans, or numbers. | |
| chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` | |
| strategy. Only applicable if `file_ids` is non-empty. | |
| file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that | |
| the vector store should use. Useful for tools like `file_search` that can access | |
| files. If `attributes` or `chunking_strategy` are provided, they will be applied | |
| to all files in the batch. The maximum batch size is 2000 files. Mutually | |
| exclusive with `files`. | |
| files: A list of objects that each include a `file_id` plus optional `attributes` or | |
| `chunking_strategy`. Use this when you need to override metadata for specific | |
| files. The global `attributes` or `chunking_strategy` will be ignored and must | |
| be specified for each file. The maximum batch size is 2000 files. Mutually | |
| exclusive with `file_ids`. | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return await self._post( | |
| f"/vector_stores/{vector_store_id}/file_batches", | |
| body=await async_maybe_transform( | |
| { | |
| "attributes": attributes, | |
| "chunking_strategy": chunking_strategy, | |
| "file_ids": file_ids, | |
| "files": files, | |
| }, | |
| file_batch_create_params.FileBatchCreateParams, | |
| ), | |
| options=make_request_options( | |
| extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout | |
| ), | |
| cast_to=VectorStoreFileBatch, | |
| ) | |
| async def retrieve( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """ | |
| Retrieves a vector store file batch. | |
| Args: | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| if not batch_id: | |
| raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return await self._get( | |
| f"/vector_stores/{vector_store_id}/file_batches/{batch_id}", | |
| options=make_request_options( | |
| extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout | |
| ), | |
| cast_to=VectorStoreFileBatch, | |
| ) | |
| async def cancel( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """Cancel a vector store file batch. | |
| This attempts to cancel the processing of | |
| files in this batch as soon as possible. | |
| Args: | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| if not batch_id: | |
| raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return await self._post( | |
| f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", | |
| options=make_request_options( | |
| extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout | |
| ), | |
| cast_to=VectorStoreFileBatch, | |
| ) | |
| async def create_and_poll( | |
| self, | |
| vector_store_id: str, | |
| *, | |
| attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit, | |
| chunking_strategy: FileChunkingStrategyParam | Omit = omit, | |
| file_ids: SequenceNotStr[str] | Omit = omit, | |
| files: Iterable[file_batch_create_params.File] | Omit = omit, | |
| poll_interval_ms: int | Omit = omit, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> VectorStoreFileBatch: | |
| """Create a vector store batch and poll until all files have been processed.""" | |
| batch = await self.create( | |
| vector_store_id=vector_store_id, | |
| attributes=attributes, | |
| chunking_strategy=chunking_strategy, | |
| file_ids=file_ids, | |
| files=files, | |
| extra_headers=extra_headers, | |
| extra_query=extra_query, | |
| extra_body=extra_body, | |
| timeout=timeout, | |
| ) | |
| # TODO: don't poll unless necessary?? | |
| return await self.poll( | |
| batch.id, | |
| vector_store_id=vector_store_id, | |
| poll_interval_ms=poll_interval_ms, | |
| ) | |
| def list_files( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| after: str | Omit = omit, | |
| before: str | Omit = omit, | |
| filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit, | |
| limit: int | Omit = omit, | |
| order: Literal["asc", "desc"] | Omit = omit, | |
| # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. | |
| # The extra values given here take precedence over values defined on the client or passed to this method. | |
| extra_headers: Headers | None = None, | |
| extra_query: Query | None = None, | |
| extra_body: Body | None = None, | |
| timeout: float | httpx.Timeout | None | NotGiven = not_given, | |
| ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]: | |
| """ | |
| Returns a list of vector store files in a batch. | |
| Args: | |
| after: A cursor for use in pagination. `after` is an object ID that defines your place | |
| in the list. For instance, if you make a list request and receive 100 objects, | |
| ending with obj_foo, your subsequent call can include after=obj_foo in order to | |
| fetch the next page of the list. | |
| before: A cursor for use in pagination. `before` is an object ID that defines your place | |
| in the list. For instance, if you make a list request and receive 100 objects, | |
| starting with obj_foo, your subsequent call can include before=obj_foo in order | |
| to fetch the previous page of the list. | |
| filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. | |
| limit: A limit on the number of objects to be returned. Limit can range between 1 and | |
| 100, and the default is 20. | |
| order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending | |
| order and `desc` for descending order. | |
| extra_headers: Send extra headers | |
| extra_query: Add additional query parameters to the request | |
| extra_body: Add additional JSON properties to the request | |
| timeout: Override the client-level default timeout for this request, in seconds | |
| """ | |
| if not vector_store_id: | |
| raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}") | |
| if not batch_id: | |
| raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") | |
| extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} | |
| return self._get_api_list( | |
| f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", | |
| page=AsyncCursorPage[VectorStoreFile], | |
| options=make_request_options( | |
| extra_headers=extra_headers, | |
| extra_query=extra_query, | |
| extra_body=extra_body, | |
| timeout=timeout, | |
| query=maybe_transform( | |
| { | |
| "after": after, | |
| "before": before, | |
| "filter": filter, | |
| "limit": limit, | |
| "order": order, | |
| }, | |
| file_batch_list_files_params.FileBatchListFilesParams, | |
| ), | |
| ), | |
| model=VectorStoreFile, | |
| ) | |
| async def poll( | |
| self, | |
| batch_id: str, | |
| *, | |
| vector_store_id: str, | |
| poll_interval_ms: int | Omit = omit, | |
| ) -> VectorStoreFileBatch: | |
| """Wait for the given file batch to be processed. | |
| Note: this will return even if one of the files failed to process, you need to | |
| check batch.file_counts.failed_count to handle this case. | |
| """ | |
| headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"} | |
| if is_given(poll_interval_ms): | |
| headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms) | |
| while True: | |
| response = await self.with_raw_response.retrieve( | |
| batch_id, | |
| vector_store_id=vector_store_id, | |
| extra_headers=headers, | |
| ) | |
| batch = response.parse() | |
| if batch.file_counts.in_progress > 0: | |
| if not is_given(poll_interval_ms): | |
| from_header = response.headers.get("openai-poll-after-ms") | |
| if from_header is not None: | |
| poll_interval_ms = int(from_header) | |
| else: | |
| poll_interval_ms = 1000 | |
| await self._sleep(poll_interval_ms / 1000) | |
| continue | |
| return batch | |
| async def upload_and_poll( | |
| self, | |
| vector_store_id: str, | |
| *, | |
| files: Iterable[FileTypes], | |
| max_concurrency: int = 5, | |
| file_ids: SequenceNotStr[str] = [], | |
| poll_interval_ms: int | Omit = omit, | |
| chunking_strategy: FileChunkingStrategyParam | Omit = omit, | |
| ) -> VectorStoreFileBatch: | |
| """Uploads the given files concurrently and then creates a vector store file batch. | |
| If you've already uploaded certain files that you want to include in this batch | |
| then you can pass their IDs through the `file_ids` argument. | |
| By default, if any file upload fails then an exception will be eagerly raised. | |
| The number of concurrency uploads is configurable using the `max_concurrency` | |
| parameter. | |
| Note: this method only supports `asyncio` or `trio` as the backing async | |
| runtime. | |
| """ | |
| uploaded_files: list[FileObject] = [] | |
| async_library = sniffio.current_async_library() | |
| if async_library == "asyncio": | |
| async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None: | |
| async with semaphore: | |
| file_obj = await self._client.files.create( | |
| file=file, | |
| purpose="assistants", | |
| ) | |
| uploaded_files.append(file_obj) | |
| semaphore = asyncio.Semaphore(max_concurrency) | |
| tasks = [asyncio_upload_file(semaphore, file) for file in files] | |
| await asyncio.gather(*tasks) | |
| elif async_library == "trio": | |
| # We only import if the library is being used. | |
| # We support Python 3.7 so are using an older version of trio that does not have type information | |
| import trio # type: ignore # pyright: ignore[reportMissingTypeStubs] | |
| async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None: | |
| async with limiter: | |
| file_obj = await self._client.files.create( | |
| file=file, | |
| purpose="assistants", | |
| ) | |
| uploaded_files.append(file_obj) | |
| limiter = trio.CapacityLimiter(max_concurrency) | |
| async with trio.open_nursery() as nursery: | |
| for file in files: | |
| nursery.start_soon(trio_upload_file, limiter, file) # pyright: ignore [reportUnknownMemberType] | |
| else: | |
| raise RuntimeError( | |
| f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported", | |
| ) | |
| batch = await self.create_and_poll( | |
| vector_store_id=vector_store_id, | |
| file_ids=[*file_ids, *(f.id for f in uploaded_files)], | |
| poll_interval_ms=poll_interval_ms, | |
| chunking_strategy=chunking_strategy, | |
| ) | |
| return batch | |
| class FileBatchesWithRawResponse: | |
| def __init__(self, file_batches: FileBatches) -> None: | |
| self._file_batches = file_batches | |
| self.create = _legacy_response.to_raw_response_wrapper( | |
| file_batches.create, | |
| ) | |
| self.retrieve = _legacy_response.to_raw_response_wrapper( | |
| file_batches.retrieve, | |
| ) | |
| self.cancel = _legacy_response.to_raw_response_wrapper( | |
| file_batches.cancel, | |
| ) | |
| self.list_files = _legacy_response.to_raw_response_wrapper( | |
| file_batches.list_files, | |
| ) | |
| class AsyncFileBatchesWithRawResponse: | |
| def __init__(self, file_batches: AsyncFileBatches) -> None: | |
| self._file_batches = file_batches | |
| self.create = _legacy_response.async_to_raw_response_wrapper( | |
| file_batches.create, | |
| ) | |
| self.retrieve = _legacy_response.async_to_raw_response_wrapper( | |
| file_batches.retrieve, | |
| ) | |
| self.cancel = _legacy_response.async_to_raw_response_wrapper( | |
| file_batches.cancel, | |
| ) | |
| self.list_files = _legacy_response.async_to_raw_response_wrapper( | |
| file_batches.list_files, | |
| ) | |
| class FileBatchesWithStreamingResponse: | |
| def __init__(self, file_batches: FileBatches) -> None: | |
| self._file_batches = file_batches | |
| self.create = to_streamed_response_wrapper( | |
| file_batches.create, | |
| ) | |
| self.retrieve = to_streamed_response_wrapper( | |
| file_batches.retrieve, | |
| ) | |
| self.cancel = to_streamed_response_wrapper( | |
| file_batches.cancel, | |
| ) | |
| self.list_files = to_streamed_response_wrapper( | |
| file_batches.list_files, | |
| ) | |
| class AsyncFileBatchesWithStreamingResponse: | |
| def __init__(self, file_batches: AsyncFileBatches) -> None: | |
| self._file_batches = file_batches | |
| self.create = async_to_streamed_response_wrapper( | |
| file_batches.create, | |
| ) | |
| self.retrieve = async_to_streamed_response_wrapper( | |
| file_batches.retrieve, | |
| ) | |
| self.cancel = async_to_streamed_response_wrapper( | |
| file_batches.cancel, | |
| ) | |
| self.list_files = async_to_streamed_response_wrapper( | |
| file_batches.list_files, | |
| ) | |