Spaces:
No application file
No application file
| from typing import Callable | |
| import numpy as np | |
| from utils.embedding import EmbeddingModel | |
| from utils.metric import CosineSimilarity, DistanceMetric | |
| from utils.vector import Vector | |
| class VectorDatabase: | |
| def __init__(self, model: EmbeddingModel) -> None: | |
| self.vectors: dict[str, Vector] = {} | |
| self.model = model | |
| def insert(self, vector: Vector) -> None: | |
| self.vectors[vector.key] = vector | |
| def search( | |
| self, | |
| query_vector: np.array, | |
| k: int, | |
| min_quality: float, | |
| distance_measure: Callable, | |
| ) -> list[Vector]: | |
| scores = [] | |
| for vector in self.vectors.values(): | |
| similarity = distance_measure(query_vector, vector.data) | |
| if similarity < min_quality: | |
| continue | |
| scores.append((vector, similarity)) | |
| return sorted(scores, key=lambda i: i[1], reverse=True)[:k] | |
| async def asearch_by_text( | |
| self, | |
| query_text: str, | |
| k: int = 5, | |
| min_quality: float = 0.5, | |
| metric_class: DistanceMetric = CosineSimilarity, | |
| text_only: bool = False, | |
| ) -> list[tuple[str, float]]: | |
| query_vector = await self.model.aget_embedding(query_text) | |
| response = self.search(query_vector, k, min_quality, metric_class.count) | |
| return [item[0].key for item in response] if text_only else response | |
| async def abuild_from_list(self, texts: list[str], metadata: dict) -> None: | |
| embeddings = await self.model.aget_embeddings(texts) | |
| for item, emb in zip(texts, embeddings): | |
| self.insert(Vector(data=emb, key=item, metadata=metadata)) | |