AIDA / scripts /sync_qdrant_with_mongo.py
destinyebuka's picture
fyp
8c9362b
"""
Sync Qdrant with MongoDB - removes listings from Qdrant that no longer exist in MongoDB.
Usage: python scripts/sync_qdrant_with_mongo.py
"""
import asyncio
import os
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient
from qdrant_client import QdrantClient
from qdrant_client.models import PointIdsList
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI") or os.getenv("DATABASE_URL")
DB_NAME = os.getenv("MONGODB_DATABASE", "lojiz")
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = "listings"
async def sync_qdrant_with_mongo():
"""Remove listings from Qdrant that don't exist in MongoDB."""
print("πŸ”„ Connecting to MongoDB...")
mongo_client = AsyncIOMotorClient(MONGO_URI)
db = mongo_client[DB_NAME]
print("πŸ”„ Connecting to Qdrant...")
qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
# Step 1: Get all listing IDs from MongoDB
print("\nπŸ“Š Fetching MongoDB listings...")
mongo_ids = set()
async for doc in db.listings.find({}, {"_id": 1}):
mongo_ids.add(str(doc["_id"]))
print(f" Found {len(mongo_ids)} listings in MongoDB")
# Step 2: Get all point IDs from Qdrant
print("\nπŸ“Š Fetching Qdrant points...")
qdrant_ids = set()
offset = None
while True:
result = qdrant.scroll(
collection_name=COLLECTION_NAME,
limit=100,
offset=offset,
with_payload=["mongo_id", "title"],
with_vectors=False
)
points, next_offset = result
if not points:
break
for point in points:
# Try to get mongo_id from payload, or use point id
mongo_id = point.payload.get("mongo_id") or str(point.id)
qdrant_ids.add((point.id, mongo_id, point.payload.get("title", "Unknown")))
if next_offset is None:
break
offset = next_offset
print(f" Found {len(qdrant_ids)} points in Qdrant")
# Step 3: Find IDs in Qdrant but not in MongoDB
stale_points = []
for point_id, mongo_id, title in qdrant_ids:
if mongo_id not in mongo_ids:
stale_points.append((point_id, title))
print(f"\nπŸ—‘οΈ Found {len(stale_points)} stale points to delete:")
for point_id, title in stale_points:
print(f" - {title} (ID: {point_id})")
if not stale_points:
print("\nβœ… Qdrant is already in sync with MongoDB!")
mongo_client.close()
return
# Step 4: Delete stale points from Qdrant
confirm = input(f"\n⚠️ Delete {len(stale_points)} stale points from Qdrant? (y/n): ")
if confirm.lower() != 'y':
print("❌ Cancelled")
mongo_client.close()
return
point_ids = [p[0] for p in stale_points]
qdrant.delete(
collection_name=COLLECTION_NAME,
points_selector=PointIdsList(points=point_ids)
)
print(f"\nπŸŽ‰ Deleted {len(stale_points)} stale points from Qdrant!")
print("βœ… Qdrant is now synced with MongoDB")
mongo_client.close()
if __name__ == "__main__":
asyncio.run(sync_qdrant_with_mongo())