File size: 4,699 Bytes
00e6a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""

Test script: Select best 40 photos from a single month folder.

Usage: python test_single_month.py <folder_path> [target_count]



Example:

    python test_single_month.py "C:/Photos/2024/January" 40

"""

import sys
import os
from pathlib import Path

# Add project to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from photo_selector.monthly_selector import MonthlyPhotoSelector


def test_single_month(folder_path: str, target: int = 40):
    """

    Test photo selection on a single folder.



    Args:

        folder_path: Path to folder containing photos

        target: Number of photos to select (default 40)

    """
    folder = Path(folder_path)

    if not folder.exists():
        print(f"Error: Folder not found: {folder}")
        return

    # Count photos
    extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
    photos = [f for f in folder.iterdir() if f.suffix.lower() in extensions]

    print(f"\n{'='*60}")
    print(f"SINGLE MONTH TEST")
    print(f"{'='*60}")
    print(f"Folder: {folder}")
    print(f"Photos found: {len(photos)}")
    print(f"Target selection: {target}")
    print(f"{'='*60}\n")

    if len(photos) == 0:
        print("No photos found in folder!")
        return

    # Initialize selector
    print("Initializing selector (loading CLIP model)...")
    selector = MonthlyPhotoSelector()

    # Step 1: Generate embeddings
    print(f"\n[Step 1] Generating CLIP embeddings for {len(photos)} photos...")
    photo_paths = [str(p) for p in photos]
    embeddings = selector.generate_embeddings(photo_paths)
    print(f"Generated embeddings for {len(embeddings)} photos")

    # Step 2: Score photos
    print(f"\n[Step 2] Scoring photos...")
    from photo_selector.scoring import PhotoScorer
    scorer = PhotoScorer()

    scored_photos = []
    for i, photo_path in enumerate(photo_paths):
        if (i + 1) % 10 == 0:
            print(f"  Scoring {i + 1}/{len(photo_paths)}...")

        filename = Path(photo_path).name
        emb = embeddings.get(filename)

        # Get scores
        scores = scorer.score_photo(photo_path)

        scored_photos.append({
            'filename': filename,
            'filepath': photo_path,
            'total': scores.get('total', 0),
            'face_quality': scores.get('face_quality', 0),
            'aesthetic_quality': scores.get('aesthetic_quality', 0),
            'emotional_signal': scores.get('emotional_signal', 0),
            'uniqueness': scores.get('uniqueness', 0.5),
            'num_faces': scores.get('num_faces', 0)
        })

    print(f"Scored {len(scored_photos)} photos")

    # Step 3: Cluster and select using HDBSCAN
    print(f"\n[Step 3] Running HDBSCAN clustering and selection...")
    selected = selector.select_hybrid_hdbscan(
        scored_photos,
        embeddings,
        target=target
    )

    # Results
    print(f"\n{'='*60}")
    print(f"RESULTS")
    print(f"{'='*60}")
    print(f"Total photos: {len(photos)}")
    print(f"Selected: {len(selected)}")
    print(f"{'='*60}\n")

    # Show selected photos
    print("Selected photos (ranked by score):\n")
    print(f"{'#':<4} {'Score':>6} {'Faces':>6} {'Cluster':>8} {'Similarity':>10} {'Filename':<40}")
    print("-" * 80)

    for i, photo in enumerate(selected, 1):
        score = photo.get('total', 0) * 100
        faces = photo.get('num_faces', 0)
        cluster = photo.get('cluster_id', -1)
        cluster_label = f"C{cluster}" if cluster >= 0 else "Fallback"
        similarity = photo.get('max_similarity', 0) * 100
        filename = photo.get('filename', '?')[:38]

        print(f"{i:<4} {score:>5.1f}% {faces:>6} {cluster_label:>8} {similarity:>9.1f}% {filename:<40}")

    # Cluster distribution
    print(f"\n{'='*60}")
    print("CLUSTER DISTRIBUTION")
    print(f"{'='*60}")

    cluster_counts = {}
    for photo in selected:
        cid = photo.get('cluster_id', -1)
        cluster_counts[cid] = cluster_counts.get(cid, 0) + 1

    for cid in sorted(cluster_counts.keys()):
        label = f"Cluster {cid}" if cid >= 0 else "Fallback"
        count = cluster_counts[cid]
        bar = "█" * count
        print(f"  {label:<12}: {count:>3} {bar}")

    print(f"\n{'='*60}")

    return selected


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(__doc__)
        print("\nNo folder provided. Please specify a folder path.")
        sys.exit(1)

    folder_path = sys.argv[1]
    target = int(sys.argv[2]) if len(sys.argv) > 2 else 40

    test_single_month(folder_path, target)