#!/usr/bin/env python3 """ Simple Qdrant Collection Backup Script Backs up collection metadata and all points to JSON files """ from qdrant_client import QdrantClient from core.clients import get_qdrant from core.config import QDRANT_COLLECTION from qdrant_client.http.models import PointStruct import json import os from datetime import datetime from qdrant_client import QdrantClient from qdrant_client.models import Filter def backup_collection(client, collection_name, backup_dir="./backups"): """ Backup a Qdrant collection to JSON files Args: client: QdrantClient instance collection_name: Name of collection to backup backup_dir: Directory to save backup files """ # Create backup directory timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") collection_backup_dir = os.path.join(backup_dir, f"{collection_name}_{timestamp}") os.makedirs(collection_backup_dir, exist_ok=True) print(f"Starting backup of collection '{collection_name}'...") try: # 1. Backup collection info collection_info = client.get_collection(collection_name) with open(os.path.join(collection_backup_dir, "collection_info.json"), "w") as f: # Convert to dict for JSON serialization info_dict = { "collection_name": collection_name, "vectors_config": {} } # Handle vectors config (named vectors) vectors_config = collection_info.config.params.vectors if isinstance(vectors_config, dict): # Named vectors (your case with clip_text_embedding, clip_image_embedding) for name, vector_params in vectors_config.items(): info_dict["vectors_config"][name] = { "size": vector_params.size, "distance": vector_params.distance.value if hasattr(vector_params.distance, 'value') else str(vector_params.distance) } else: # Single vector config info_dict["vectors_config"]["default"] = { "size": vectors_config.size, "distance": vectors_config.distance.value if hasattr(vectors_config.distance, 'value') else str(vectors_config.distance) } # Add other basic config info info_dict["config"] = { "replication_factor": collection_info.config.params.replication_factor, "write_consistency_factor": collection_info.config.params.write_consistency_factor, "shard_number": collection_info.config.params.shard_number, } json.dump(info_dict, f, indent=2) print("āœ“ Collection metadata backed up") # 2. Backup all points batch_size = 100 offset = None all_points = [] while True: # Scroll through all points points, next_offset = client.scroll( collection_name=collection_name, limit=batch_size, offset=offset, with_payload=True, with_vectors=True ) if not points: break # Convert points to serializable format for point in points: point_dict = { "id": point.id, "payload": point.payload, "vector": {} } # Handle both named and unnamed vectors if isinstance(point.vector, dict): # Named vectors (clip_text_embedding, clip_image_embedding, etc.) for vector_name, vector_data in point.vector.items(): if vector_data is not None: point_dict["vector"][vector_name] = list(vector_data) if hasattr(vector_data, '__iter__') else vector_data elif point.vector is not None: # Single vector point_dict["vector"] = list(point.vector) if hasattr(point.vector, '__iter__') else point.vector all_points.append(point_dict) print(f"Backed up {len(all_points)} points...") if next_offset is None: break offset = next_offset # Save all points to JSON with open(os.path.join(collection_backup_dir, "points.json"), "w") as f: json.dump(all_points, f, indent=2) print(f"āœ“ {len(all_points)} points backed up") # 3. Create backup summary summary = { "collection_name": collection_name, "backup_timestamp": timestamp, "total_points": len(all_points), "backup_files": ["collection_info.json", "points.json"] } with open(os.path.join(collection_backup_dir, "backup_summary.json"), "w") as f: json.dump(summary, f, indent=2) print(f"āœ“ Backup completed successfully!") print(f"Backup location: {collection_backup_dir}") print(f"Files created:") print(f" - collection_info.json (metadata)") print(f" - points.json ({len(all_points)} points)") print(f" - backup_summary.json (summary)") return collection_backup_dir except Exception as e: print(f"āŒ Backup failed: {str(e)}") return None def main(): # Initialize client client = get_qdrant() # List available collections collections = client.get_collections() print("Available collections:") for collection in collections.collections: print(f" - {collection.name}") # Backup the specified collection backup_dir = backup_collection(client, QDRANT_COLLECTION) if backup_dir: print(f"\nšŸŽ‰ Backup successful! Files saved to: {backup_dir}") else: print("\nāŒ Backup failed!") if __name__ == "__main__": main()