""" Sync Qdrant with MongoDB - removes listings from Qdrant that no longer exist in MongoDB. Usage: python scripts/sync_qdrant_with_mongo.py """ import asyncio import os from dotenv import load_dotenv from motor.motor_asyncio import AsyncIOMotorClient from qdrant_client import QdrantClient from qdrant_client.models import PointIdsList load_dotenv() MONGO_URI = os.getenv("MONGO_URI") or os.getenv("DATABASE_URL") DB_NAME = os.getenv("MONGODB_DATABASE", "lojiz") QDRANT_URL = os.getenv("QDRANT_URL") QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") COLLECTION_NAME = "listings" async def sync_qdrant_with_mongo(): """Remove listings from Qdrant that don't exist in MongoDB.""" print("šŸ”„ Connecting to MongoDB...") mongo_client = AsyncIOMotorClient(MONGO_URI) db = mongo_client[DB_NAME] print("šŸ”„ Connecting to Qdrant...") qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY) # Step 1: Get all listing IDs from MongoDB print("\nšŸ“Š Fetching MongoDB listings...") mongo_ids = set() async for doc in db.listings.find({}, {"_id": 1}): mongo_ids.add(str(doc["_id"])) print(f" Found {len(mongo_ids)} listings in MongoDB") # Step 2: Get all point IDs from Qdrant print("\nšŸ“Š Fetching Qdrant points...") qdrant_ids = set() offset = None while True: result = qdrant.scroll( collection_name=COLLECTION_NAME, limit=100, offset=offset, with_payload=["mongo_id", "title"], with_vectors=False ) points, next_offset = result if not points: break for point in points: # Try to get mongo_id from payload, or use point id mongo_id = point.payload.get("mongo_id") or str(point.id) qdrant_ids.add((point.id, mongo_id, point.payload.get("title", "Unknown"))) if next_offset is None: break offset = next_offset print(f" Found {len(qdrant_ids)} points in Qdrant") # Step 3: Find IDs in Qdrant but not in MongoDB stale_points = [] for point_id, mongo_id, title in qdrant_ids: if mongo_id not in mongo_ids: stale_points.append((point_id, title)) print(f"\nšŸ—‘ļø Found {len(stale_points)} stale points to delete:") for point_id, title in stale_points: print(f" - {title} (ID: {point_id})") if not stale_points: print("\nāœ… Qdrant is already in sync with MongoDB!") mongo_client.close() return # Step 4: Delete stale points from Qdrant confirm = input(f"\nāš ļø Delete {len(stale_points)} stale points from Qdrant? (y/n): ") if confirm.lower() != 'y': print("āŒ Cancelled") mongo_client.close() return point_ids = [p[0] for p in stale_points] qdrant.delete( collection_name=COLLECTION_NAME, points_selector=PointIdsList(points=point_ids) ) print(f"\nšŸŽ‰ Deleted {len(stale_points)} stale points from Qdrant!") print("āœ… Qdrant is now synced with MongoDB") mongo_client.close() if __name__ == "__main__": asyncio.run(sync_qdrant_with_mongo())