Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from typing import Any, List, Literal, Optional | |
| from langchain_core.embeddings import Embeddings | |
| from langchain.vectorstores.docarray.base import ( | |
| DocArrayIndex, | |
| _check_docarray_import, | |
| ) | |
| class DocArrayHnswSearch(DocArrayIndex): | |
| """`HnswLib` storage using `DocArray` package. | |
| To use it, you should have the ``docarray`` package with version >=0.32.0 installed. | |
| You can install it with `pip install "langchain[docarray]"`. | |
| """ | |
| def from_params( | |
| cls, | |
| embedding: Embeddings, | |
| work_dir: str, | |
| n_dim: int, | |
| dist_metric: Literal["cosine", "ip", "l2"] = "cosine", | |
| max_elements: int = 1024, | |
| index: bool = True, | |
| ef_construction: int = 200, | |
| ef: int = 10, | |
| M: int = 16, | |
| allow_replace_deleted: bool = True, | |
| num_threads: int = 1, | |
| **kwargs: Any, | |
| ) -> DocArrayHnswSearch: | |
| """Initialize DocArrayHnswSearch store. | |
| Args: | |
| embedding (Embeddings): Embedding function. | |
| work_dir (str): path to the location where all the data will be stored. | |
| n_dim (int): dimension of an embedding. | |
| dist_metric (str): Distance metric for DocArrayHnswSearch can be one of: | |
| "cosine", "ip", and "l2". Defaults to "cosine". | |
| max_elements (int): Maximum number of vectors that can be stored. | |
| Defaults to 1024. | |
| index (bool): Whether an index should be built for this field. | |
| Defaults to True. | |
| ef_construction (int): defines a construction time/accuracy trade-off. | |
| Defaults to 200. | |
| ef (int): parameter controlling query time/accuracy trade-off. | |
| Defaults to 10. | |
| M (int): parameter that defines the maximum number of outgoing | |
| connections in the graph. Defaults to 16. | |
| allow_replace_deleted (bool): Enables replacing of deleted elements | |
| with new added ones. Defaults to True. | |
| num_threads (int): Sets the number of cpu threads to use. Defaults to 1. | |
| **kwargs: Other keyword arguments to be passed to the get_doc_cls method. | |
| """ | |
| _check_docarray_import() | |
| from docarray.index import HnswDocumentIndex | |
| doc_cls = cls._get_doc_cls( | |
| dim=n_dim, | |
| space=dist_metric, | |
| max_elements=max_elements, | |
| index=index, | |
| ef_construction=ef_construction, | |
| ef=ef, | |
| M=M, | |
| allow_replace_deleted=allow_replace_deleted, | |
| num_threads=num_threads, | |
| **kwargs, | |
| ) | |
| doc_index = HnswDocumentIndex[doc_cls](work_dir=work_dir) # type: ignore | |
| return cls(doc_index, embedding) | |
| def from_texts( | |
| cls, | |
| texts: List[str], | |
| embedding: Embeddings, | |
| metadatas: Optional[List[dict]] = None, | |
| work_dir: Optional[str] = None, | |
| n_dim: Optional[int] = None, | |
| **kwargs: Any, | |
| ) -> DocArrayHnswSearch: | |
| """Create an DocArrayHnswSearch store and insert data. | |
| Args: | |
| texts (List[str]): Text data. | |
| embedding (Embeddings): Embedding function. | |
| metadatas (Optional[List[dict]]): Metadata for each text if it exists. | |
| Defaults to None. | |
| work_dir (str): path to the location where all the data will be stored. | |
| n_dim (int): dimension of an embedding. | |
| **kwargs: Other keyword arguments to be passed to the __init__ method. | |
| Returns: | |
| DocArrayHnswSearch Vector Store | |
| """ | |
| if work_dir is None: | |
| raise ValueError("`work_dir` parameter has not been set.") | |
| if n_dim is None: | |
| raise ValueError("`n_dim` parameter has not been set.") | |
| store = cls.from_params(embedding, work_dir, n_dim, **kwargs) | |
| store.add_texts(texts=texts, metadatas=metadatas) | |
| return store | |