rag-implementation / aimakerspace /vectordatabase.py
Shipmaster1's picture
Upload 11 files
348f7b7 verified
import numpy as np
from typing import List, Tuple, Dict
from .openai_utils.embedding import EmbeddingModel
class VectorDatabase:
def __init__(self, embedding_model: EmbeddingModel = None):
self.vectors: Dict[str, np.ndarray] = {}
self.texts: List[str] = []
self.embedding_model = embedding_model or EmbeddingModel()
async def abuild_from_list(self, list_of_text: List[str]) -> 'VectorDatabase':
embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
for text, embedding in zip(list_of_text, embeddings):
self.insert(text, np.array(embedding))
return self
def insert(self, text: str, vector: np.ndarray):
self.texts.append(text)
self.vectors[text] = vector
def search_by_text(self, query: str, k: int = 4) -> List[Tuple[str, float]]:
query_embedding = self.embedding_model.get_embedding(query)
similarities = []
for text, vector in self.vectors.items():
similarity = np.dot(query_embedding, vector) / (np.linalg.norm(query_embedding) * np.linalg.norm(vector))
similarities.append((text, similarity))
return sorted(similarities, key=lambda x: x[1], reverse=True)[:k]