BinKhoaLe1812 commited on
Commit
0c0f651
·
verified ·
1 Parent(s): 88e7815

Update utils/migrate.py

Browse files
Files changed (1) hide show
  1. utils/migrate.py +49 -43
utils/migrate.py CHANGED
@@ -3,46 +3,52 @@ from pymongo import MongoClient
3
  from dotenv import load_dotenv
4
  import os
5
 
6
- # Load environment variables from .env
7
- load_dotenv()
8
- # Connection strings (update as needed)
9
- mongo_uri = os.getenv("MONGO_URI") # QA cluster connection string
10
- index_uri = os.getenv("INDEX_URI") # FAISS index cluster connection string
11
-
12
- if not mongo_uri:
13
- raise ValueError("MONGO_URI is missing!")
14
- if not index_uri:
15
- raise ValueError("INDEX_URI is missing!")
16
-
17
- # Connect to the QA cluster (where FAISS data was accidentally stored)
18
- qa_client = MongoClient(mongo_uri)
19
- qa_db = qa_client["MedicalChatbotDB"]
20
-
21
- # Connect to the FAISS index cluster
22
- faiss_client = MongoClient(index_uri)
23
- faiss_db = faiss_client["MedicalChatbotDB"] # Use the same database name if desired
24
-
25
- # Define the GridFS collections to move.
26
- # In GridFS, files are stored in two collections: "<bucket>.files" and "<bucket>.chunks".
27
- source_files = qa_db["faiss_index_files.files"]
28
- source_chunks = qa_db["faiss_index_files.chunks"]
29
-
30
- dest_files = faiss_db["faiss_index_files.files"]
31
- dest_chunks = faiss_db["faiss_index_files.chunks"]
32
-
33
- print("Moving FAISS index GridFS files...")
34
-
35
- # Copy documents from the source 'files' collection
36
- for doc in source_files.find():
37
- dest_files.insert_one(doc)
38
-
39
- # Copy documents from the source 'chunks' collection
40
- for doc in source_chunks.find():
41
- dest_chunks.insert_one(doc)
42
-
43
- print("✅ FAISS GridFS collections moved successfully.")
44
-
45
- # Optionally, drop the old collections from the QA cluster to free up space:
46
- qa_db.drop_collection("faiss_index_files.files")
47
- qa_db.drop_collection("faiss_index_files.chunks")
48
- print("Old FAISS GridFS collections dropped from the QA cluster.")
 
 
 
 
 
 
 
3
  from dotenv import load_dotenv
4
  import os
5
 
6
+ def migrate_faiss_index():
7
+ """Migrate FAISS index from QA cluster to index cluster"""
8
+ # Load environment variables from .env
9
+ load_dotenv()
10
+ # Connection strings (update as needed)
11
+ mongo_uri = os.getenv("MONGO_URI") # QA cluster connection string
12
+ index_uri = os.getenv("INDEX_URI") # FAISS index cluster connection string
13
+
14
+ if not mongo_uri:
15
+ raise ValueError("MONGO_URI is missing!")
16
+ if not index_uri:
17
+ raise ValueError("INDEX_URI is missing!")
18
+
19
+ # Connect to the QA cluster (where FAISS data was accidentally stored)
20
+ qa_client = MongoClient(mongo_uri)
21
+ qa_db = qa_client["MedicalChatbotDB"]
22
+
23
+ # Connect to the FAISS index cluster
24
+ faiss_client = MongoClient(index_uri)
25
+ faiss_db = faiss_client["MedicalChatbotDB"] # Use the same database name if desired
26
+
27
+ # Define the GridFS collections to move.
28
+ # In GridFS, files are stored in two collections: "<bucket>.files" and "<bucket>.chunks".
29
+ source_files = qa_db["faiss_index_files.files"]
30
+ source_chunks = qa_db["faiss_index_files.chunks"]
31
+
32
+ dest_files = faiss_db["faiss_index_files.files"]
33
+ dest_chunks = faiss_db["faiss_index_files.chunks"]
34
+
35
+ print("Moving FAISS index GridFS files...")
36
+
37
+ # Copy documents from the source 'files' collection
38
+ for doc in source_files.find():
39
+ dest_files.insert_one(doc)
40
+
41
+ # Copy documents from the source 'chunks' collection
42
+ for doc in source_chunks.find():
43
+ dest_chunks.insert_one(doc)
44
+
45
+ print("✅ FAISS GridFS collections moved successfully.")
46
+
47
+ # Optionally, drop the old collections from the QA cluster to free up space:
48
+ qa_db.drop_collection("faiss_index_files.files")
49
+ qa_db.drop_collection("faiss_index_files.chunks")
50
+ print("Old FAISS GridFS collections dropped from the QA cluster.")
51
+
52
+ # Only run when called directly
53
+ if __name__ == "__main__":
54
+ migrate_faiss_index()