AlexFoxalt's picture
Deploying Pythonic RAG
2be9eb9
from typing import Callable
import numpy as np
from utils.embedding import EmbeddingModel
from utils.metric import CosineSimilarity, DistanceMetric
from utils.vector import Vector
class VectorDatabase:
def __init__(self, model: EmbeddingModel) -> None:
self.vectors: dict[str, Vector] = {}
self.model = model
def insert(self, vector: Vector) -> None:
self.vectors[vector.key] = vector
def search(
self,
query_vector: np.array,
k: int,
min_quality: float,
distance_measure: Callable,
) -> list[Vector]:
scores = []
for vector in self.vectors.values():
similarity = distance_measure(query_vector, vector.data)
if similarity < min_quality:
continue
scores.append((vector, similarity))
return sorted(scores, key=lambda i: i[1], reverse=True)[:k]
async def asearch_by_text(
self,
query_text: str,
k: int = 5,
min_quality: float = 0.5,
metric_class: DistanceMetric = CosineSimilarity,
text_only: bool = False,
) -> list[tuple[str, float]]:
query_vector = await self.model.aget_embedding(query_text)
response = self.search(query_vector, k, min_quality, metric_class.count)
return [item[0].key for item in response] if text_only else response
async def abuild_from_list(self, texts: list[str], metadata: dict) -> None:
embeddings = await self.model.aget_embeddings(texts)
for item, emb in zip(texts, embeddings):
self.insert(Vector(data=emb, key=item, metadata=metadata))