Tahasaif3 commited on
Commit
c231ecc
Β·
verified Β·
1 Parent(s): a6e0caa

Create fix_collection.py

Browse files
Files changed (1) hide show
  1. app/fix_collection.py +50 -0
app/fix_collection.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from app.services.vector_store import vector_store
3
+ from app.services.document_ingestion import initialize_knowledge_base_async
4
+
5
+ async def fix_collection():
6
+ print("=" * 60)
7
+ print("πŸ”§ FIXING QDRANT COLLECTION DIMENSIONS")
8
+ print("=" * 60)
9
+
10
+ try:
11
+ # Step 1: Delete the old collection
12
+ print("\n1️⃣ Deleting old collection (1536 dimensions)...")
13
+ vector_store.delete_collection()
14
+ print(" βœ“ Old collection deleted")
15
+ except Exception as e:
16
+ print(f" β„Ή Collection delete: {e}")
17
+
18
+ try:
19
+ # Step 2: Create new collection
20
+ print("\n2️⃣ Creating new collection (768 dimensions for Gemini)...")
21
+ vector_store.create_collection(force_recreate=True)
22
+ print(" βœ“ New collection created")
23
+ except Exception as e:
24
+ print(f" βœ— Error creating collection: {e}")
25
+ return
26
+
27
+ try:
28
+ # Step 3: Re-ingest all documents
29
+ print("\n3️⃣ Re-ingesting documents with Gemini embeddings...")
30
+ print(" (This may take a few minutes...)")
31
+ doc_ids = await initialize_knowledge_base_async()
32
+ print(f" βœ“ Successfully ingested {len(doc_ids)} documents")
33
+ except Exception as e:
34
+ print(f" βœ— Error ingesting documents: {e}")
35
+ import traceback
36
+ traceback.print_exc()
37
+ return
38
+
39
+ print("\n" + "=" * 60)
40
+ print("βœ… MIGRATION COMPLETE!")
41
+ print("=" * 60)
42
+ print(f"βœ“ Collection: book_documents (768 dimensions)")
43
+ print(f"βœ“ Documents: {len(doc_ids)} chunks")
44
+ print(f"βœ“ Embedding model: Gemini text-embedding-004")
45
+ print(f"βœ“ LLM model: Gemini 2.0 Flash")
46
+ print("\nYou can now start your application! πŸš€")
47
+ print("=" * 60)
48
+
49
+ if __name__ == "__main__":
50
+ asyncio.run(fix_collection())