Spaces:

rethinks
/

childYb

Sleeping

App Files Files Community

rethinks commited on Jan 21

Commit

00e6a0c

verified ·

1 Parent(s): d072264

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +38 -0
README.md +18 -10
app.py +0 -0
requirements.txt +34 -0
supabase_storage.py +324 -0
test_single_month.py +149 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Upgrade pip
+RUN pip install --upgrade pip
+# Copy requirements first (for caching)
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir flask gunicorn && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy app code
+COPY . .
+# Create necessary directories
+RUN mkdir -p uploads results references selected_photos thumbnails
+# Expose port
+EXPOSE 7860
+# Run the app
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,18 @@
----
-title: ChildYb
-emoji: 🏆
-colorFrom: indigo
-colorTo: purple
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: CustomYB Photo Selector
+emoji: 📸
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# CustomYB - Smart Photo Selection
+AI-powered photo selection for yearbooks. Upload photos and let AI select the best ones featuring your child.
+## Features
+- Face recognition to find your child
+- Quality scoring
+- Duplicate removal
+- Category detection (portrait, group, candid)

app.py ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,34 @@

+# Core dependencies
+Flask>=3.0.0
+Werkzeug>=3.0.1
+gunicorn>=21.2.0
+# Image processing
+Pillow>=10.0.0
+pillow-heif>=0.14.0
+opencv-python-headless>=4.8.0
+# Machine Learning
+sentence-transformers>=2.2.2
+torch>=2.1.0
+torchvision>=0.16.0
+hdbscan>=0.8.33
+scikit-learn>=1.3.0
+numpy>=1.26.0
+# Face Recognition
+insightface>=0.7.3
+onnxruntime>=1.16.0
+# Utilities
+tqdm>=4.66.0
+python-dotenv>=1.0.0
+# Cloud Storage
+supabase>=2.0.0
+# CLIP
+ftfy
+regex
+git+https://github.com/openai/CLIP.git

supabase_storage.py ADDED Viewed

	@@ -0,0 +1,324 @@

+"""
+Supabase Storage Integration for Photo Selection App
+Handles persistent storage of dataset metadata (not photos) in Supabase.
+"""
+import os
+import json
+from typing import Optional, List, Dict, Any
+# Supabase credentials
+SUPABASE_URL = os.environ.get('SUPABASE_URL', 'https://cqnyibiopjcwuxmyqbgy.supabase.co')
+SUPABASE_KEY = os.environ.get('SUPABASE_KEY', '')
+BUCKET_NAME = 'datasets'
+# Initialize Supabase client (lazy loading)
+_supabase_client = None
+def get_supabase_client():
+    """Get or create Supabase client."""
+    global _supabase_client
+    if not SUPABASE_KEY:
+        print("[Supabase] No SUPABASE_KEY found in environment")
+        return None
+    if _supabase_client is None:
+        try:
+            from supabase import create_client
+            _supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
+            print(f"[Supabase] Connected to {SUPABASE_URL}")
+        except ImportError:
+            print("[Supabase] supabase-py not installed. Run: pip install supabase")
+            return None
+        except Exception as e:
+            print(f"[Supabase] Connection error: {e}")
+            return None
+    return _supabase_client
+def is_supabase_available() -> bool:
+    """Check if Supabase is configured and available."""
+    return get_supabase_client() is not None
+def _get_dataset_registry(client) -> List[str]:
+    """Get the list of dataset names from the registry file."""
+    try:
+        storage = client.storage.from_(BUCKET_NAME)
+        response = storage.download("_registry.json")
+        registry = json.loads(response.decode('utf-8'))
+        return registry.get('datasets', [])
+    except Exception:
+        # Registry doesn't exist yet
+        return []
+def _update_dataset_registry(client, dataset_name: str, action: str = 'add'):
+    """Update the registry file with dataset names."""
+    try:
+        storage = client.storage.from_(BUCKET_NAME)
+        # Get current registry
+        datasets = _get_dataset_registry(client)
+        if action == 'add' and dataset_name not in datasets:
+            datasets.append(dataset_name)
+        elif action == 'remove' and dataset_name in datasets:
+            datasets.remove(dataset_name)
+        else:
+            return  # No changes needed
+        # Save updated registry
+        registry_data = json.dumps({'datasets': datasets}, indent=2).encode('utf-8')
+        # Try to update (upsert)
+        try:
+            storage.update(
+                path="_registry.json",
+                file=registry_data,
+                file_options={"content-type": "application/json"}
+            )
+        except Exception:
+            # File doesn't exist, create it
+            storage.upload(
+                path="_registry.json",
+                file=registry_data,
+                file_options={"content-type": "application/json"}
+            )
+        print(f"[Supabase] Registry updated: {action} '{dataset_name}'")
+    except Exception as e:
+        print(f"[Supabase] Error updating registry: {e}")
+def save_dataset_to_supabase(
+    dataset_name: str,
+    embeddings_data: bytes,
+    face_results: dict,
+    metadata: dict
+) -> bool:
+    """
+    Save dataset files to Supabase Storage.
+    Args:
+        dataset_name: Unique name for the dataset (folder name)
+        embeddings_data: Binary data of reference_embeddings.npz
+        face_results: Dictionary of face detection results
+        metadata: Dataset metadata dictionary
+    Returns:
+        True if successful, False otherwise
+    """
+    client = get_supabase_client()
+    if not client:
+        print("[Supabase] Client not available, skipping cloud save")
+        return False
+    try:
+        # 1. Upload reference embeddings (.npz file)
+        embeddings_path = f"{dataset_name}/reference_embeddings.npz"
+        result = client.storage.from_(BUCKET_NAME).upload(
+            path=embeddings_path,
+            file=embeddings_data,
+            file_options={"content-type": "application/octet-stream"}
+        )
+        print(f"[Supabase] Uploaded {embeddings_path}: {result}")
+        # 2. Upload face results (JSON)
+        face_results_path = f"{dataset_name}/face_results.json"
+        face_results_bytes = json.dumps(face_results, indent=2).encode('utf-8')
+        result = client.storage.from_(BUCKET_NAME).upload(
+            path=face_results_path,
+            file=face_results_bytes,
+            file_options={"content-type": "application/json"}
+        )
+        print(f"[Supabase] Uploaded {face_results_path}: {result}")
+        # 3. Upload metadata (JSON)
+        metadata_path = f"{dataset_name}/metadata.json"
+        metadata_bytes = json.dumps(metadata, indent=2).encode('utf-8')
+        result = client.storage.from_(BUCKET_NAME).upload(
+            path=metadata_path,
+            file=metadata_bytes,
+            file_options={"content-type": "application/json"}
+        )
+        print(f"[Supabase] Uploaded {metadata_path}")
+        # 4. Update the registry file (list of all dataset names)
+        _update_dataset_registry(client, dataset_name, action='add')
+        print(f"[Supabase] Dataset '{dataset_name}' saved successfully")
+        return True
+    except Exception as e:
+        print(f"[Supabase] Error saving dataset: {e}")
+        return False
+def load_dataset_from_supabase(dataset_name: str) -> Optional[Dict[str, Any]]:
+    """
+    Load dataset files from Supabase Storage.
+    Args:
+        dataset_name: Name of the dataset to load
+    Returns:
+        Dictionary with 'embeddings_data', 'face_results', 'metadata' or None if failed
+    """
+    client = get_supabase_client()
+    if not client:
+        print("[Supabase] Client not available")
+        return None
+    try:
+        result = {}
+        # 1. Download reference embeddings
+        embeddings_path = f"{dataset_name}/reference_embeddings.npz"
+        response = client.storage.from_(BUCKET_NAME).download(embeddings_path)
+        result['embeddings_data'] = response
+        print(f"[Supabase] Downloaded {embeddings_path}")
+        # 2. Download face results
+        face_results_path = f"{dataset_name}/face_results.json"
+        response = client.storage.from_(BUCKET_NAME).download(face_results_path)
+        result['face_results'] = json.loads(response.decode('utf-8'))
+        print(f"[Supabase] Downloaded {face_results_path}")
+        # 3. Download metadata
+        metadata_path = f"{dataset_name}/metadata.json"
+        response = client.storage.from_(BUCKET_NAME).download(metadata_path)
+        result['metadata'] = json.loads(response.decode('utf-8'))
+        print(f"[Supabase] Downloaded {metadata_path}")
+        print(f"[Supabase] Dataset '{dataset_name}' loaded successfully")
+        return result
+    except Exception as e:
+        print(f"[Supabase] Error loading dataset: {e}")
+        return None
+def list_datasets_from_supabase() -> List[Dict[str, Any]]:
+    """
+    List all datasets stored in Supabase.
+    Returns:
+        List of dataset metadata dictionaries
+    """
+    client = get_supabase_client()
+    if not client:
+        print("[Supabase] Client not available")
+        return []
+    try:
+        storage = client.storage.from_(BUCKET_NAME)
+        # Get dataset names from registry
+        dataset_names = _get_dataset_registry(client)
+        print(f"[Supabase] Registry contains: {dataset_names}")
+        # If registry is empty, try to find existing datasets by checking known names
+        # This handles the case where datasets were saved before registry was implemented
+        if not dataset_names:
+            print("[Supabase] Registry empty, checking for existing datasets...")
+            # Try some known/common dataset names
+            potential_names = ['testing']
+            for name in potential_names:
+                try:
+                    storage.download(f"{name}/metadata.json")
+                    dataset_names.append(name)
+                    print(f"[Supabase] Found existing dataset: {name}")
+                except Exception:
+                    pass
+        datasets = []
+        for folder_name in dataset_names:
+            try:
+                metadata_path = f"{folder_name}/metadata.json"
+                metadata_response = storage.download(metadata_path)
+                metadata = json.loads(metadata_response.decode('utf-8'))
+                metadata['folder_name'] = folder_name
+                metadata['source'] = 'supabase'
+                datasets.append(metadata)
+                print(f"[Supabase] Loaded metadata for {folder_name}")
+            except Exception as e:
+                print(f"[Supabase] Could not load metadata for {folder_name}: {e}")
+                # Add basic info without full metadata
+                datasets.append({
+                    'name': folder_name,
+                    'folder_name': folder_name,
+                    'source': 'supabase',
+                    'total_photos': 0,
+                    'created_at': None
+                })
+        print(f"[Supabase] Found {len(datasets)} datasets")
+        return datasets
+    except Exception as e:
+        print(f"[Supabase] Error listing datasets: {e}")
+        import traceback
+        traceback.print_exc()
+        return []
+def delete_dataset_from_supabase(dataset_name: str) -> bool:
+    """
+    Delete a dataset from Supabase Storage.
+    Args:
+        dataset_name: Name of the dataset to delete
+    Returns:
+        True if successful, False otherwise
+    """
+    client = get_supabase_client()
+    if not client:
+        print("[Supabase] Client not available")
+        return False
+    try:
+        # List all files in the dataset folder
+        files = client.storage.from_(BUCKET_NAME).list(dataset_name)
+        # Delete each file
+        file_paths = [f"{dataset_name}/{f['name']}" for f in files if f.get('name')]
+        if file_paths:
+            client.storage.from_(BUCKET_NAME).remove(file_paths)
+            print(f"[Supabase] Deleted {len(file_paths)} files from '{dataset_name}'")
+        # Remove from registry
+        _update_dataset_registry(client, dataset_name, action='remove')
+        print(f"[Supabase] Dataset '{dataset_name}' deleted successfully")
+        return True
+    except Exception as e:
+        print(f"[Supabase] Error deleting dataset: {e}")
+        return False
+def check_dataset_exists_in_supabase(dataset_name: str) -> bool:
+    """
+    Check if a dataset exists in Supabase.
+    Args:
+        dataset_name: Name of the dataset to check
+    Returns:
+        True if exists, False otherwise
+    """
+    client = get_supabase_client()
+    if not client:
+        return False
+    try:
+        # Try to list files in the dataset folder
+        files = client.storage.from_(BUCKET_NAME).list(dataset_name)
+        return len(files) > 0
+    except:
+        return False

test_single_month.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""
+Test script: Select best 40 photos from a single month folder.
+Usage: python test_single_month.py <folder_path> [target_count]
+Example:
+    python test_single_month.py "C:/Photos/2024/January" 40
+"""
+import sys
+import os
+from pathlib import Path
+# Add project to path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from photo_selector.monthly_selector import MonthlyPhotoSelector
+def test_single_month(folder_path: str, target: int = 40):
+    """
+    Test photo selection on a single folder.
+    Args:
+        folder_path: Path to folder containing photos
+        target: Number of photos to select (default 40)
+    """
+    folder = Path(folder_path)
+    if not folder.exists():
+        print(f"Error: Folder not found: {folder}")
+        return
+    # Count photos
+    extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
+    photos = [f for f in folder.iterdir() if f.suffix.lower() in extensions]
+    print(f"\n{'='*60}")
+    print(f"SINGLE MONTH TEST")
+    print(f"{'='*60}")
+    print(f"Folder: {folder}")
+    print(f"Photos found: {len(photos)}")
+    print(f"Target selection: {target}")
+    print(f"{'='*60}\n")
+    if len(photos) == 0:
+        print("No photos found in folder!")
+        return
+    # Initialize selector
+    print("Initializing selector (loading CLIP model)...")
+    selector = MonthlyPhotoSelector()
+    # Step 1: Generate embeddings
+    print(f"\n[Step 1] Generating CLIP embeddings for {len(photos)} photos...")
+    photo_paths = [str(p) for p in photos]
+    embeddings = selector.generate_embeddings(photo_paths)
+    print(f"Generated embeddings for {len(embeddings)} photos")
+    # Step 2: Score photos
+    print(f"\n[Step 2] Scoring photos...")
+    from photo_selector.scoring import PhotoScorer
+    scorer = PhotoScorer()
+    scored_photos = []
+    for i, photo_path in enumerate(photo_paths):
+        if (i + 1) % 10 == 0:
+            print(f"  Scoring {i + 1}/{len(photo_paths)}...")
+        filename = Path(photo_path).name
+        emb = embeddings.get(filename)
+        # Get scores
+        scores = scorer.score_photo(photo_path)
+        scored_photos.append({
+            'filename': filename,
+            'filepath': photo_path,
+            'total': scores.get('total', 0),
+            'face_quality': scores.get('face_quality', 0),
+            'aesthetic_quality': scores.get('aesthetic_quality', 0),
+            'emotional_signal': scores.get('emotional_signal', 0),
+            'uniqueness': scores.get('uniqueness', 0.5),
+            'num_faces': scores.get('num_faces', 0)
+        })
+    print(f"Scored {len(scored_photos)} photos")
+    # Step 3: Cluster and select using HDBSCAN
+    print(f"\n[Step 3] Running HDBSCAN clustering and selection...")
+    selected = selector.select_hybrid_hdbscan(
+        scored_photos,
+        embeddings,
+        target=target
+    )
+    # Results
+    print(f"\n{'='*60}")
+    print(f"RESULTS")
+    print(f"{'='*60}")
+    print(f"Total photos: {len(photos)}")
+    print(f"Selected: {len(selected)}")
+    print(f"{'='*60}\n")
+    # Show selected photos
+    print("Selected photos (ranked by score):\n")
+    print(f"{'#':<4} {'Score':>6} {'Faces':>6} {'Cluster':>8} {'Similarity':>10} {'Filename':<40}")
+    print("-" * 80)
+    for i, photo in enumerate(selected, 1):
+        score = photo.get('total', 0) * 100
+        faces = photo.get('num_faces', 0)
+        cluster = photo.get('cluster_id', -1)
+        cluster_label = f"C{cluster}" if cluster >= 0 else "Fallback"
+        similarity = photo.get('max_similarity', 0) * 100
+        filename = photo.get('filename', '?')[:38]
+        print(f"{i:<4} {score:>5.1f}% {faces:>6} {cluster_label:>8} {similarity:>9.1f}% {filename:<40}")
+    # Cluster distribution
+    print(f"\n{'='*60}")
+    print("CLUSTER DISTRIBUTION")
+    print(f"{'='*60}")
+    cluster_counts = {}
+    for photo in selected:
+        cid = photo.get('cluster_id', -1)
+        cluster_counts[cid] = cluster_counts.get(cid, 0) + 1
+    for cid in sorted(cluster_counts.keys()):
+        label = f"Cluster {cid}" if cid >= 0 else "Fallback"
+        count = cluster_counts[cid]
+        bar = "█" * count
+        print(f"  {label:<12}: {count:>3} {bar}")
+    print(f"\n{'='*60}")
+    return selected
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(__doc__)
+        print("\nNo folder provided. Please specify a folder path.")
+        sys.exit(1)
+    folder_path = sys.argv[1]
+    target = int(sys.argv[2]) if len(sys.argv) > 2 else 40
+    test_single_month(folder_path, target)