vgecbot / app /services /vector_store.py
harsh-dev's picture
docker deployment
4225666
from langchain_core.documents import Document
from typing import List, Optional
import json
class VectorStore:
def __init__(self, db):
self.db = db
def get(self):
return self.db.get()
def get_by_id(self, ids: list[str]):
return self.db.get(ids=ids)
def get_dict(self):
data = self.db.get()
rows = [
{
"id": id_,
"document": doc,
"metadata": meta,
}
for id_, doc, meta in zip(
data["ids"],
data["documents"],
data["metadatas"],
)
]
print(type(rows))
return json.dumps(rows)
def similarity_search(self, query: str, filter: Optional[dict[str, str]] = None, k: Optional[int] = 5):
results = []
if(filter):
results = self.db.similarity_search(query, filter, k)
else:
results = self.db.similarity_search(query, k)
return results
def similarity_search_with_score(self, query: str, filter: Optional[dict[str, str]] = None, k: Optional[int] = 5):
results = []
if(filter):
results = self.db.similarity_search_with_score(query, filter, k)
else:
results = self.db.similarity_search_with_score(query, k)
return results
def add_documents(self, docs: List[Document], ids: Optional[List] = None):
result = []
final_docs = [doc for doc in docs if doc.page_content.strip()]
if(ids is not None):
result = self.db.add_documents(final_docs,ids)
else:
result = self.db.add_documents(final_docs)
return result
def update_document(self, document_id: str, document: Document):
# safest + guaranteed re-embedding
self.db.delete(ids=[document_id])
return self.db.add_documents([document], ids=[document_id])
def delete(self, ids: List):
self.db.delete(ids = ids)
return True