#!/usr/bin/env python3 """ Example script to test the face clustering system Demonstrates how to use the API programmatically """ import json from pathlib import Path from face_clustering import MemoryEfficientFaceClustering def analyze_results(database_path: str = "database.json"): """ Analyze and display clustering results from database.json Args: database_path: Path to the database.json file """ with open(database_path, 'r') as f: data = json.load(f) print("\n" + "=" * 60) print("CLUSTERING ANALYSIS") print("=" * 60) # Display metadata metadata = data.get("metadata", {}) print("\n📊 OVERALL STATISTICS:") print(f" Total images processed: {metadata.get('total_images', 0)}") print(f" Images with faces: {metadata.get('images_with_faces', 0)}") print(f" Unique people identified: {metadata.get('num_clusters', 0)}") print(f" Clustering threshold (eps): {metadata.get('dbscan_eps', 'N/A')}") # Group by cluster clusters = {} images_data = data.get("images", {}) for filename, info in images_data.items(): cluster_name = info.get("cluster_name", "Unknown") if cluster_name not in clusters: clusters[cluster_name] = [] clusters[cluster_name].append(filename) # Display cluster breakdown print(f"\n👥 CLUSTER BREAKDOWN:") for cluster_name in sorted(clusters.keys()): images = clusters[cluster_name] print(f"\n 📁 {cluster_name} ({len(images)} images):") for img in sorted(images): print(f" • {img}") # Show recommendations print("\n" + "=" * 60) print("💡 RECOMMENDATIONS") print("=" * 60) num_clusters = metadata.get('num_clusters', 0) num_faces = metadata.get('images_with_faces', 0) if num_clusters == 0 and num_faces > 0: print("⚠️ All faces clustered as noise/outliers") print(" Try INCREASING eps value (e.g., 0.6 or 0.65)") elif num_clusters > num_faces * 0.7: print("⚠️ Too many clusters (many single-image clusters)") print(" Try INCREASING eps value to merge similar faces") elif num_clusters < num_faces * 0.2: print("⚠️ Very few clusters (might be merging different people)") print(" Try DECREASING eps value for stricter matching") else: print("✅ Clustering looks good!") print("\n") def rename_files_from_database(database_path: str = "database.json", dry_run: bool = True): """ Example: Rename files based on cluster assignments Args: database_path: Path to database.json dry_run: If True, only print what would be renamed (don't actually rename) """ with open(database_path, 'r') as f: data = json.load(f) print("\n" + "=" * 60) print("FILE RENAMING" + (" (DRY RUN)" if dry_run else "")) print("=" * 60) images_data = data.get("images", {}) for filename, info in images_data.items(): cluster_id = info.get("cluster_id", -1) cluster_name = info.get("cluster_name", "Unknown") output_path = info.get("output_path", "") if cluster_id >= 0: # Generate new name: Person_0_image1.jpg old_path = Path("clustered_faces") / cluster_name / filename stem = Path(filename).stem ext = Path(filename).suffix new_name = f"{cluster_name}_{stem}{ext}" new_path = old_path.parent / new_name if dry_run: print(f"Would rename: {old_path} -> {new_path.name}") else: if old_path.exists(): old_path.rename(new_path) print(f"Renamed: {old_path.name} -> {new_path.name}") else: print(f"Skipped: {filename} (cluster_id: {cluster_id})") if dry_run: print("\nℹ️ This was a dry run. Set dry_run=False to actually rename files.") def export_cluster_to_list(database_path: str = "database.json", cluster_id: int = 0): """ Export all filenames from a specific cluster Args: database_path: Path to database.json cluster_id: Which cluster to export (0, 1, 2, etc.) Returns: List of filenames in that cluster """ with open(database_path, 'r') as f: data = json.load(f) images_data = data.get("images", {}) cluster_images = [ filename for filename, info in images_data.items() if info.get("cluster_id") == cluster_id ] print(f"\n📋 Images in cluster {cluster_id} (Person_{cluster_id}):") for img in cluster_images: print(f" • {img}") return cluster_images def main(): """ Example usage of the clustering system """ print("=" * 60) print("FACE CLUSTERING - EXAMPLE USAGE") print("=" * 60) # Option 1: Run clustering with custom settings print("\n1️⃣ Running face clustering...") print(" (Make sure you have images in the current directory)") # Uncomment to actually run: # clusterer = MemoryEfficientFaceClustering( # source_dir=".", # output_dir="clustered_faces" # ) # clusterer.run() print(" [Skipped - uncomment code to run]") # Option 2: Analyze results db_path = "database.json" if Path(db_path).exists(): print(f"\n2️⃣ Analyzing results from {db_path}...") analyze_results(db_path) # Option 3: Demonstrate renaming print("\n3️⃣ Demonstrating file renaming...") rename_files_from_database(db_path, dry_run=True) # Option 4: Export specific cluster print("\n4️⃣ Exporting cluster 0...") export_cluster_to_list(db_path, cluster_id=0) else: print(f"\n⚠️ {db_path} not found. Run clustering first!") print("\n" + "=" * 60) print("Example complete!") print("=" * 60) if __name__ == "__main__": main()