from typing import Callable import numpy as np from utils.embedding import EmbeddingModel from utils.metric import CosineSimilarity, DistanceMetric from utils.vector import Vector class VectorDatabase: def __init__(self, model: EmbeddingModel) -> None: self.vectors: dict[str, Vector] = {} self.model = model def insert(self, vector: Vector) -> None: self.vectors[vector.key] = vector def search( self, query_vector: np.array, k: int, min_quality: float, distance_measure: Callable, ) -> list[Vector]: scores = [] for vector in self.vectors.values(): similarity = distance_measure(query_vector, vector.data) if similarity < min_quality: continue scores.append((vector, similarity)) return sorted(scores, key=lambda i: i[1], reverse=True)[:k] async def asearch_by_text( self, query_text: str, k: int = 5, min_quality: float = 0.5, metric_class: DistanceMetric = CosineSimilarity, text_only: bool = False, ) -> list[tuple[str, float]]: query_vector = await self.model.aget_embedding(query_text) response = self.search(query_vector, k, min_quality, metric_class.count) return [item[0].key for item in response] if text_only else response async def abuild_from_list(self, texts: list[str], metadata: dict) -> None: embeddings = await self.model.aget_embeddings(texts) for item, emb in zip(texts, embeddings): self.insert(Vector(data=emb, key=item, metadata=metadata))