Spaces:
Build error
Build error
| import numpy as np | |
| from typing import List, Tuple, Dict | |
| from .openai_utils.embedding import EmbeddingModel | |
| class VectorDatabase: | |
| def __init__(self, embedding_model: EmbeddingModel = None): | |
| self.vectors: Dict[str, np.ndarray] = {} | |
| self.texts: List[str] = [] | |
| self.embedding_model = embedding_model or EmbeddingModel() | |
| async def abuild_from_list(self, list_of_text: List[str]) -> 'VectorDatabase': | |
| embeddings = await self.embedding_model.async_get_embeddings(list_of_text) | |
| for text, embedding in zip(list_of_text, embeddings): | |
| self.insert(text, np.array(embedding)) | |
| return self | |
| def insert(self, text: str, vector: np.ndarray): | |
| self.texts.append(text) | |
| self.vectors[text] = vector | |
| def search_by_text(self, query: str, k: int = 4) -> List[Tuple[str, float]]: | |
| query_embedding = self.embedding_model.get_embedding(query) | |
| similarities = [] | |
| for text, vector in self.vectors.items(): | |
| similarity = np.dot(query_embedding, vector) / (np.linalg.norm(query_embedding) * np.linalg.norm(vector)) | |
| similarities.append((text, similarity)) | |
| return sorted(similarities, key=lambda x: x[1], reverse=True)[:k] |