File size: 3,255 Bytes
8c9362b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
Sync Qdrant with MongoDB - removes listings from Qdrant that no longer exist in MongoDB.

Usage: python scripts/sync_qdrant_with_mongo.py
"""

import asyncio
import os
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient
from qdrant_client import QdrantClient
from qdrant_client.models import PointIdsList

load_dotenv()

MONGO_URI = os.getenv("MONGO_URI") or os.getenv("DATABASE_URL")
DB_NAME = os.getenv("MONGODB_DATABASE", "lojiz")
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = "listings"


async def sync_qdrant_with_mongo():
    """Remove listings from Qdrant that don't exist in MongoDB."""
    
    print("πŸ”„ Connecting to MongoDB...")
    mongo_client = AsyncIOMotorClient(MONGO_URI)
    db = mongo_client[DB_NAME]
    
    print("πŸ”„ Connecting to Qdrant...")
    qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
    
    # Step 1: Get all listing IDs from MongoDB
    print("\nπŸ“Š Fetching MongoDB listings...")
    mongo_ids = set()
    async for doc in db.listings.find({}, {"_id": 1}):
        mongo_ids.add(str(doc["_id"]))
    print(f"   Found {len(mongo_ids)} listings in MongoDB")
    
    # Step 2: Get all point IDs from Qdrant
    print("\nπŸ“Š Fetching Qdrant points...")
    qdrant_ids = set()
    offset = None
    
    while True:
        result = qdrant.scroll(
            collection_name=COLLECTION_NAME,
            limit=100,
            offset=offset,
            with_payload=["mongo_id", "title"],
            with_vectors=False
        )
        
        points, next_offset = result
        if not points:
            break
            
        for point in points:
            # Try to get mongo_id from payload, or use point id
            mongo_id = point.payload.get("mongo_id") or str(point.id)
            qdrant_ids.add((point.id, mongo_id, point.payload.get("title", "Unknown")))
        
        if next_offset is None:
            break
        offset = next_offset
    
    print(f"   Found {len(qdrant_ids)} points in Qdrant")
    
    # Step 3: Find IDs in Qdrant but not in MongoDB
    stale_points = []
    for point_id, mongo_id, title in qdrant_ids:
        if mongo_id not in mongo_ids:
            stale_points.append((point_id, title))
    
    print(f"\nπŸ—‘οΈ  Found {len(stale_points)} stale points to delete:")
    for point_id, title in stale_points:
        print(f"   - {title} (ID: {point_id})")
    
    if not stale_points:
        print("\nβœ… Qdrant is already in sync with MongoDB!")
        mongo_client.close()
        return
    
    # Step 4: Delete stale points from Qdrant
    confirm = input(f"\n⚠️  Delete {len(stale_points)} stale points from Qdrant? (y/n): ")
    if confirm.lower() != 'y':
        print("❌ Cancelled")
        mongo_client.close()
        return
    
    point_ids = [p[0] for p in stale_points]
    qdrant.delete(
        collection_name=COLLECTION_NAME,
        points_selector=PointIdsList(points=point_ids)
    )
    
    print(f"\nπŸŽ‰ Deleted {len(stale_points)} stale points from Qdrant!")
    print("βœ… Qdrant is now synced with MongoDB")
    
    mongo_client.close()


if __name__ == "__main__":
    asyncio.run(sync_qdrant_with_mongo())