jaothan commited on
Commit
7aaa9ef
·
verified ·
1 Parent(s): 9827840

Upload manage_vectordb.py

Browse files
Files changed (1) hide show
  1. manage_vectordb.py +81 -0
manage_vectordb.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import Chroma
2
+ from chromadb import HttpClient
3
+ from chromadb.config import Settings
4
+ import chromadb.utils.embedding_functions as embedding_functions
5
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
6
+ from langchain_community.vectorstores import Milvus
7
+ from pymilvus import MilvusClient
8
+ from pymilvus import connections, utility
9
+
10
+ class VectorDB:
11
+ def __init__(self, vector_vendor, host, port, collection_name, embedding_model):
12
+ self.vector_vendor = vector_vendor
13
+ self.host = host
14
+ self.port = port
15
+ self.collection_name = collection_name
16
+ self.embedding_model = embedding_model
17
+
18
+ def connect(self):
19
+ # Connection logic
20
+ print(f"Connecting to {self.host}:{self.port}...")
21
+ if self.vector_vendor == "chromadb":
22
+ self.client = HttpClient(host=self.host,
23
+ port=self.port,
24
+ settings=Settings(allow_reset=True,))
25
+ elif self.vector_vendor == "milvus":
26
+ self.client = MilvusClient(uri=f"http://{self.host}:{self.port}")
27
+ return self.client
28
+
29
+ def populate_db(self, documents):
30
+ # Logic to populate the VectorDB with vectors
31
+ e = SentenceTransformerEmbeddings(model_name=self.embedding_model)
32
+ print(f"Populating VectorDB with vectors...")
33
+ if self.vector_vendor == "chromadb":
34
+ embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=self.embedding_model)
35
+ collection = self.client.get_or_create_collection(self.collection_name,
36
+ embedding_function=embedding_func)
37
+ if collection.count() < 1:
38
+ db = Chroma.from_documents(
39
+ documents=documents,
40
+ embedding=e,
41
+ collection_name=self.collection_name,
42
+ client=self.client
43
+ )
44
+ print("DB populated")
45
+ else:
46
+ db = Chroma(client=self.client,
47
+ collection_name=self.collection_name,
48
+ embedding_function=e,
49
+ )
50
+ print("DB already populated")
51
+
52
+ elif self.vector_vendor == "milvus":
53
+ connections.connect(host=self.host, port=self.port)
54
+ if not utility.has_collection(self.collection_name):
55
+ print("Populating VectorDB with vectors...")
56
+ db = Milvus.from_documents(
57
+ documents,
58
+ e,
59
+ collection_name=self.collection_name,
60
+ connection_args={"host": self.host, "port": self.port},
61
+ )
62
+ print("DB populated")
63
+ else:
64
+ print("DB already populated")
65
+ db = Milvus(
66
+ e,
67
+ collection_name=self.collection_name,
68
+ connection_args={"host": self.host, "port": self.port},
69
+ )
70
+ return db
71
+
72
+ def clear_db(self):
73
+ print(f"Clearing VectorDB...")
74
+ try:
75
+ if self.vector_vendor == "chromadb":
76
+ self.client.delete_collection(self.collection_name)
77
+ elif self.vector_vendor == "milvus":
78
+ self.client.drop_collection(self.collection_name)
79
+ print("Cleared DB")
80
+ except:
81
+ print("Couldn't clear the collection possibly because it doesn't exist")