Spaces:
Running
Running
File size: 1,790 Bytes
4225666 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | from langchain_core.documents import Document
from typing import List, Optional
import json
class VectorStore:
def __init__(self, db):
self.db = db
def get(self):
return self.db.get()
def get_by_id(self, ids: list[str]):
return self.db.get(ids=ids)
def get_dict(self):
data = self.db.get()
rows = [
{
"id": id_,
"document": doc,
"metadata": meta,
}
for id_, doc, meta in zip(
data["ids"],
data["documents"],
data["metadatas"],
)
]
print(type(rows))
return json.dumps(rows)
def similarity_search(self, query: str, filter: Optional[dict[str, str]] = None, k: Optional[int] = 5):
results = []
if(filter):
results = self.db.similarity_search(query, filter, k)
else:
results = self.db.similarity_search(query, k)
return results
def similarity_search_with_score(self, query: str, filter: Optional[dict[str, str]] = None, k: Optional[int] = 5):
results = []
if(filter):
results = self.db.similarity_search_with_score(query, filter, k)
else:
results = self.db.similarity_search_with_score(query, k)
return results
def add_documents(self, docs: List[Document], ids: Optional[List] = None):
result = []
final_docs = [doc for doc in docs if doc.page_content.strip()]
if(ids is not None):
result = self.db.add_documents(final_docs,ids)
else:
result = self.db.add_documents(final_docs)
return result
def update_document(self, document_id: str, document: Document):
# safest + guaranteed re-embedding
self.db.delete(ids=[document_id])
return self.db.add_documents([document], ids=[document_id])
def delete(self, ids: List):
self.db.delete(ids = ids)
return True
|