object-memory / qdrant_utils /qdrant_backup.py
russ4stall
fresh history
24f3fb6
#!/usr/bin/env python3
"""
Simple Qdrant Collection Backup Script
Backs up collection metadata and all points to JSON files
"""
from qdrant_client import QdrantClient
from core.clients import get_qdrant
from core.config import QDRANT_COLLECTION
from qdrant_client.http.models import PointStruct
import json
import os
from datetime import datetime
from qdrant_client import QdrantClient
from qdrant_client.models import Filter
def backup_collection(client, collection_name, backup_dir="./backups"):
"""
Backup a Qdrant collection to JSON files
Args:
client: QdrantClient instance
collection_name: Name of collection to backup
backup_dir: Directory to save backup files
"""
# Create backup directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
collection_backup_dir = os.path.join(backup_dir, f"{collection_name}_{timestamp}")
os.makedirs(collection_backup_dir, exist_ok=True)
print(f"Starting backup of collection '{collection_name}'...")
try:
# 1. Backup collection info
collection_info = client.get_collection(collection_name)
with open(os.path.join(collection_backup_dir, "collection_info.json"), "w") as f:
# Convert to dict for JSON serialization
info_dict = {
"collection_name": collection_name,
"vectors_config": {}
}
# Handle vectors config (named vectors)
vectors_config = collection_info.config.params.vectors
if isinstance(vectors_config, dict):
# Named vectors (your case with clip_text_embedding, clip_image_embedding)
for name, vector_params in vectors_config.items():
info_dict["vectors_config"][name] = {
"size": vector_params.size,
"distance": vector_params.distance.value if hasattr(vector_params.distance, 'value') else str(vector_params.distance)
}
else:
# Single vector config
info_dict["vectors_config"]["default"] = {
"size": vectors_config.size,
"distance": vectors_config.distance.value if hasattr(vectors_config.distance, 'value') else str(vectors_config.distance)
}
# Add other basic config info
info_dict["config"] = {
"replication_factor": collection_info.config.params.replication_factor,
"write_consistency_factor": collection_info.config.params.write_consistency_factor,
"shard_number": collection_info.config.params.shard_number,
}
json.dump(info_dict, f, indent=2)
print("βœ“ Collection metadata backed up")
# 2. Backup all points
batch_size = 100
offset = None
all_points = []
while True:
# Scroll through all points
points, next_offset = client.scroll(
collection_name=collection_name,
limit=batch_size,
offset=offset,
with_payload=True,
with_vectors=True
)
if not points:
break
# Convert points to serializable format
for point in points:
point_dict = {
"id": point.id,
"payload": point.payload,
"vector": {}
}
# Handle both named and unnamed vectors
if isinstance(point.vector, dict):
# Named vectors (clip_text_embedding, clip_image_embedding, etc.)
for vector_name, vector_data in point.vector.items():
if vector_data is not None:
point_dict["vector"][vector_name] = list(vector_data) if hasattr(vector_data, '__iter__') else vector_data
elif point.vector is not None:
# Single vector
point_dict["vector"] = list(point.vector) if hasattr(point.vector, '__iter__') else point.vector
all_points.append(point_dict)
print(f"Backed up {len(all_points)} points...")
if next_offset is None:
break
offset = next_offset
# Save all points to JSON
with open(os.path.join(collection_backup_dir, "points.json"), "w") as f:
json.dump(all_points, f, indent=2)
print(f"βœ“ {len(all_points)} points backed up")
# 3. Create backup summary
summary = {
"collection_name": collection_name,
"backup_timestamp": timestamp,
"total_points": len(all_points),
"backup_files": ["collection_info.json", "points.json"]
}
with open(os.path.join(collection_backup_dir, "backup_summary.json"), "w") as f:
json.dump(summary, f, indent=2)
print(f"βœ“ Backup completed successfully!")
print(f"Backup location: {collection_backup_dir}")
print(f"Files created:")
print(f" - collection_info.json (metadata)")
print(f" - points.json ({len(all_points)} points)")
print(f" - backup_summary.json (summary)")
return collection_backup_dir
except Exception as e:
print(f"❌ Backup failed: {str(e)}")
return None
def main():
# Initialize client
client = get_qdrant()
# List available collections
collections = client.get_collections()
print("Available collections:")
for collection in collections.collections:
print(f" - {collection.name}")
# Backup the specified collection
backup_dir = backup_collection(client, QDRANT_COLLECTION)
if backup_dir:
print(f"\nπŸŽ‰ Backup successful! Files saved to: {backup_dir}")
else:
print("\n❌ Backup failed!")
if __name__ == "__main__":
main()