Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python3 | |
| """ | |
| Simple Qdrant Collection Backup Script | |
| Backs up collection metadata and all points to JSON files | |
| """ | |
| from qdrant_client import QdrantClient | |
| from core.clients import get_qdrant | |
| from core.config import QDRANT_COLLECTION | |
| from qdrant_client.http.models import PointStruct | |
| import json | |
| import os | |
| from datetime import datetime | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.models import Filter | |
| def backup_collection(client, collection_name, backup_dir="./backups"): | |
| """ | |
| Backup a Qdrant collection to JSON files | |
| Args: | |
| client: QdrantClient instance | |
| collection_name: Name of collection to backup | |
| backup_dir: Directory to save backup files | |
| """ | |
| # Create backup directory | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| collection_backup_dir = os.path.join(backup_dir, f"{collection_name}_{timestamp}") | |
| os.makedirs(collection_backup_dir, exist_ok=True) | |
| print(f"Starting backup of collection '{collection_name}'...") | |
| try: | |
| # 1. Backup collection info | |
| collection_info = client.get_collection(collection_name) | |
| with open(os.path.join(collection_backup_dir, "collection_info.json"), "w") as f: | |
| # Convert to dict for JSON serialization | |
| info_dict = { | |
| "collection_name": collection_name, | |
| "vectors_config": {} | |
| } | |
| # Handle vectors config (named vectors) | |
| vectors_config = collection_info.config.params.vectors | |
| if isinstance(vectors_config, dict): | |
| # Named vectors (your case with clip_text_embedding, clip_image_embedding) | |
| for name, vector_params in vectors_config.items(): | |
| info_dict["vectors_config"][name] = { | |
| "size": vector_params.size, | |
| "distance": vector_params.distance.value if hasattr(vector_params.distance, 'value') else str(vector_params.distance) | |
| } | |
| else: | |
| # Single vector config | |
| info_dict["vectors_config"]["default"] = { | |
| "size": vectors_config.size, | |
| "distance": vectors_config.distance.value if hasattr(vectors_config.distance, 'value') else str(vectors_config.distance) | |
| } | |
| # Add other basic config info | |
| info_dict["config"] = { | |
| "replication_factor": collection_info.config.params.replication_factor, | |
| "write_consistency_factor": collection_info.config.params.write_consistency_factor, | |
| "shard_number": collection_info.config.params.shard_number, | |
| } | |
| json.dump(info_dict, f, indent=2) | |
| print("β Collection metadata backed up") | |
| # 2. Backup all points | |
| batch_size = 100 | |
| offset = None | |
| all_points = [] | |
| while True: | |
| # Scroll through all points | |
| points, next_offset = client.scroll( | |
| collection_name=collection_name, | |
| limit=batch_size, | |
| offset=offset, | |
| with_payload=True, | |
| with_vectors=True | |
| ) | |
| if not points: | |
| break | |
| # Convert points to serializable format | |
| for point in points: | |
| point_dict = { | |
| "id": point.id, | |
| "payload": point.payload, | |
| "vector": {} | |
| } | |
| # Handle both named and unnamed vectors | |
| if isinstance(point.vector, dict): | |
| # Named vectors (clip_text_embedding, clip_image_embedding, etc.) | |
| for vector_name, vector_data in point.vector.items(): | |
| if vector_data is not None: | |
| point_dict["vector"][vector_name] = list(vector_data) if hasattr(vector_data, '__iter__') else vector_data | |
| elif point.vector is not None: | |
| # Single vector | |
| point_dict["vector"] = list(point.vector) if hasattr(point.vector, '__iter__') else point.vector | |
| all_points.append(point_dict) | |
| print(f"Backed up {len(all_points)} points...") | |
| if next_offset is None: | |
| break | |
| offset = next_offset | |
| # Save all points to JSON | |
| with open(os.path.join(collection_backup_dir, "points.json"), "w") as f: | |
| json.dump(all_points, f, indent=2) | |
| print(f"β {len(all_points)} points backed up") | |
| # 3. Create backup summary | |
| summary = { | |
| "collection_name": collection_name, | |
| "backup_timestamp": timestamp, | |
| "total_points": len(all_points), | |
| "backup_files": ["collection_info.json", "points.json"] | |
| } | |
| with open(os.path.join(collection_backup_dir, "backup_summary.json"), "w") as f: | |
| json.dump(summary, f, indent=2) | |
| print(f"β Backup completed successfully!") | |
| print(f"Backup location: {collection_backup_dir}") | |
| print(f"Files created:") | |
| print(f" - collection_info.json (metadata)") | |
| print(f" - points.json ({len(all_points)} points)") | |
| print(f" - backup_summary.json (summary)") | |
| return collection_backup_dir | |
| except Exception as e: | |
| print(f"β Backup failed: {str(e)}") | |
| return None | |
| def main(): | |
| # Initialize client | |
| client = get_qdrant() | |
| # List available collections | |
| collections = client.get_collections() | |
| print("Available collections:") | |
| for collection in collections.collections: | |
| print(f" - {collection.name}") | |
| # Backup the specified collection | |
| backup_dir = backup_collection(client, QDRANT_COLLECTION) | |
| if backup_dir: | |
| print(f"\nπ Backup successful! Files saved to: {backup_dir}") | |
| else: | |
| print("\nβ Backup failed!") | |
| if __name__ == "__main__": | |
| main() |