File size: 6,094 Bytes
a0368fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/usr/bin/env python3
"""
Example script to test the face clustering system
Demonstrates how to use the API programmatically
"""

import json
from pathlib import Path
from face_clustering import MemoryEfficientFaceClustering


def analyze_results(database_path: str = "database.json"):
    """
    Analyze and display clustering results from database.json
    
    Args:
        database_path: Path to the database.json file
    """
    with open(database_path, 'r') as f:
        data = json.load(f)
    
    print("\n" + "=" * 60)
    print("CLUSTERING ANALYSIS")
    print("=" * 60)
    
    # Display metadata
    metadata = data.get("metadata", {})
    print("\n📊 OVERALL STATISTICS:")
    print(f"  Total images processed: {metadata.get('total_images', 0)}")
    print(f"  Images with faces: {metadata.get('images_with_faces', 0)}")
    print(f"  Unique people identified: {metadata.get('num_clusters', 0)}")
    print(f"  Clustering threshold (eps): {metadata.get('dbscan_eps', 'N/A')}")
    
    # Group by cluster
    clusters = {}
    images_data = data.get("images", {})
    
    for filename, info in images_data.items():
        cluster_name = info.get("cluster_name", "Unknown")
        if cluster_name not in clusters:
            clusters[cluster_name] = []
        clusters[cluster_name].append(filename)
    
    # Display cluster breakdown
    print(f"\n👥 CLUSTER BREAKDOWN:")
    for cluster_name in sorted(clusters.keys()):
        images = clusters[cluster_name]
        print(f"\n  📁 {cluster_name} ({len(images)} images):")
        for img in sorted(images):
            print(f"     • {img}")
    
    # Show recommendations
    print("\n" + "=" * 60)
    print("💡 RECOMMENDATIONS")
    print("=" * 60)
    
    num_clusters = metadata.get('num_clusters', 0)
    num_faces = metadata.get('images_with_faces', 0)
    
    if num_clusters == 0 and num_faces > 0:
        print("⚠️  All faces clustered as noise/outliers")
        print("   Try INCREASING eps value (e.g., 0.6 or 0.65)")
    elif num_clusters > num_faces * 0.7:
        print("⚠️  Too many clusters (many single-image clusters)")
        print("   Try INCREASING eps value to merge similar faces")
    elif num_clusters < num_faces * 0.2:
        print("⚠️  Very few clusters (might be merging different people)")
        print("   Try DECREASING eps value for stricter matching")
    else:
        print("✅ Clustering looks good!")
    
    print("\n")


def rename_files_from_database(database_path: str = "database.json", dry_run: bool = True):
    """
    Example: Rename files based on cluster assignments
    
    Args:
        database_path: Path to database.json
        dry_run: If True, only print what would be renamed (don't actually rename)
    """
    with open(database_path, 'r') as f:
        data = json.load(f)
    
    print("\n" + "=" * 60)
    print("FILE RENAMING" + (" (DRY RUN)" if dry_run else ""))
    print("=" * 60)
    
    images_data = data.get("images", {})
    
    for filename, info in images_data.items():
        cluster_id = info.get("cluster_id", -1)
        cluster_name = info.get("cluster_name", "Unknown")
        output_path = info.get("output_path", "")
        
        if cluster_id >= 0:
            # Generate new name: Person_0_image1.jpg
            old_path = Path("clustered_faces") / cluster_name / filename
            stem = Path(filename).stem
            ext = Path(filename).suffix
            new_name = f"{cluster_name}_{stem}{ext}"
            new_path = old_path.parent / new_name
            
            if dry_run:
                print(f"Would rename: {old_path} -> {new_path.name}")
            else:
                if old_path.exists():
                    old_path.rename(new_path)
                    print(f"Renamed: {old_path.name} -> {new_path.name}")
        else:
            print(f"Skipped: {filename} (cluster_id: {cluster_id})")
    
    if dry_run:
        print("\nℹ️  This was a dry run. Set dry_run=False to actually rename files.")


def export_cluster_to_list(database_path: str = "database.json", cluster_id: int = 0):
    """
    Export all filenames from a specific cluster
    
    Args:
        database_path: Path to database.json
        cluster_id: Which cluster to export (0, 1, 2, etc.)
    
    Returns:
        List of filenames in that cluster
    """
    with open(database_path, 'r') as f:
        data = json.load(f)
    
    images_data = data.get("images", {})
    cluster_images = [
        filename for filename, info in images_data.items()
        if info.get("cluster_id") == cluster_id
    ]
    
    print(f"\n📋 Images in cluster {cluster_id} (Person_{cluster_id}):")
    for img in cluster_images:
        print(f"  • {img}")
    
    return cluster_images


def main():
    """
    Example usage of the clustering system
    """
    print("=" * 60)
    print("FACE CLUSTERING - EXAMPLE USAGE")
    print("=" * 60)
    
    # Option 1: Run clustering with custom settings
    print("\n1️⃣  Running face clustering...")
    print("   (Make sure you have images in the current directory)")
    
    # Uncomment to actually run:
    # clusterer = MemoryEfficientFaceClustering(
    #     source_dir=".",
    #     output_dir="clustered_faces"
    # )
    # clusterer.run()
    
    print("   [Skipped - uncomment code to run]")
    
    # Option 2: Analyze results
    db_path = "database.json"
    if Path(db_path).exists():
        print(f"\n2️⃣  Analyzing results from {db_path}...")
        analyze_results(db_path)
        
        # Option 3: Demonstrate renaming
        print("\n3️⃣  Demonstrating file renaming...")
        rename_files_from_database(db_path, dry_run=True)
        
        # Option 4: Export specific cluster
        print("\n4️⃣  Exporting cluster 0...")
        export_cluster_to_list(db_path, cluster_id=0)
    else:
        print(f"\n⚠️  {db_path} not found. Run clustering first!")
    
    print("\n" + "=" * 60)
    print("Example complete!")
    print("=" * 60)


if __name__ == "__main__":
    main()