File size: 6,094 Bytes
a0368fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | #!/usr/bin/env python3
"""
Example script to test the face clustering system
Demonstrates how to use the API programmatically
"""
import json
from pathlib import Path
from face_clustering import MemoryEfficientFaceClustering
def analyze_results(database_path: str = "database.json"):
"""
Analyze and display clustering results from database.json
Args:
database_path: Path to the database.json file
"""
with open(database_path, 'r') as f:
data = json.load(f)
print("\n" + "=" * 60)
print("CLUSTERING ANALYSIS")
print("=" * 60)
# Display metadata
metadata = data.get("metadata", {})
print("\n📊 OVERALL STATISTICS:")
print(f" Total images processed: {metadata.get('total_images', 0)}")
print(f" Images with faces: {metadata.get('images_with_faces', 0)}")
print(f" Unique people identified: {metadata.get('num_clusters', 0)}")
print(f" Clustering threshold (eps): {metadata.get('dbscan_eps', 'N/A')}")
# Group by cluster
clusters = {}
images_data = data.get("images", {})
for filename, info in images_data.items():
cluster_name = info.get("cluster_name", "Unknown")
if cluster_name not in clusters:
clusters[cluster_name] = []
clusters[cluster_name].append(filename)
# Display cluster breakdown
print(f"\n👥 CLUSTER BREAKDOWN:")
for cluster_name in sorted(clusters.keys()):
images = clusters[cluster_name]
print(f"\n 📁 {cluster_name} ({len(images)} images):")
for img in sorted(images):
print(f" • {img}")
# Show recommendations
print("\n" + "=" * 60)
print("💡 RECOMMENDATIONS")
print("=" * 60)
num_clusters = metadata.get('num_clusters', 0)
num_faces = metadata.get('images_with_faces', 0)
if num_clusters == 0 and num_faces > 0:
print("⚠️ All faces clustered as noise/outliers")
print(" Try INCREASING eps value (e.g., 0.6 or 0.65)")
elif num_clusters > num_faces * 0.7:
print("⚠️ Too many clusters (many single-image clusters)")
print(" Try INCREASING eps value to merge similar faces")
elif num_clusters < num_faces * 0.2:
print("⚠️ Very few clusters (might be merging different people)")
print(" Try DECREASING eps value for stricter matching")
else:
print("✅ Clustering looks good!")
print("\n")
def rename_files_from_database(database_path: str = "database.json", dry_run: bool = True):
"""
Example: Rename files based on cluster assignments
Args:
database_path: Path to database.json
dry_run: If True, only print what would be renamed (don't actually rename)
"""
with open(database_path, 'r') as f:
data = json.load(f)
print("\n" + "=" * 60)
print("FILE RENAMING" + (" (DRY RUN)" if dry_run else ""))
print("=" * 60)
images_data = data.get("images", {})
for filename, info in images_data.items():
cluster_id = info.get("cluster_id", -1)
cluster_name = info.get("cluster_name", "Unknown")
output_path = info.get("output_path", "")
if cluster_id >= 0:
# Generate new name: Person_0_image1.jpg
old_path = Path("clustered_faces") / cluster_name / filename
stem = Path(filename).stem
ext = Path(filename).suffix
new_name = f"{cluster_name}_{stem}{ext}"
new_path = old_path.parent / new_name
if dry_run:
print(f"Would rename: {old_path} -> {new_path.name}")
else:
if old_path.exists():
old_path.rename(new_path)
print(f"Renamed: {old_path.name} -> {new_path.name}")
else:
print(f"Skipped: {filename} (cluster_id: {cluster_id})")
if dry_run:
print("\nℹ️ This was a dry run. Set dry_run=False to actually rename files.")
def export_cluster_to_list(database_path: str = "database.json", cluster_id: int = 0):
"""
Export all filenames from a specific cluster
Args:
database_path: Path to database.json
cluster_id: Which cluster to export (0, 1, 2, etc.)
Returns:
List of filenames in that cluster
"""
with open(database_path, 'r') as f:
data = json.load(f)
images_data = data.get("images", {})
cluster_images = [
filename for filename, info in images_data.items()
if info.get("cluster_id") == cluster_id
]
print(f"\n📋 Images in cluster {cluster_id} (Person_{cluster_id}):")
for img in cluster_images:
print(f" • {img}")
return cluster_images
def main():
"""
Example usage of the clustering system
"""
print("=" * 60)
print("FACE CLUSTERING - EXAMPLE USAGE")
print("=" * 60)
# Option 1: Run clustering with custom settings
print("\n1️⃣ Running face clustering...")
print(" (Make sure you have images in the current directory)")
# Uncomment to actually run:
# clusterer = MemoryEfficientFaceClustering(
# source_dir=".",
# output_dir="clustered_faces"
# )
# clusterer.run()
print(" [Skipped - uncomment code to run]")
# Option 2: Analyze results
db_path = "database.json"
if Path(db_path).exists():
print(f"\n2️⃣ Analyzing results from {db_path}...")
analyze_results(db_path)
# Option 3: Demonstrate renaming
print("\n3️⃣ Demonstrating file renaming...")
rename_files_from_database(db_path, dry_run=True)
# Option 4: Export specific cluster
print("\n4️⃣ Exporting cluster 0...")
export_cluster_to_list(db_path, cluster_id=0)
else:
print(f"\n⚠️ {db_path} not found. Run clustering first!")
print("\n" + "=" * 60)
print("Example complete!")
print("=" * 60)
if __name__ == "__main__":
main()
|