|
|
"""
|
|
|
Photo Selection Web App
|
|
|
Flask-based frontend for testing the photo selection pipeline
|
|
|
Now with AUTOMATIC selection - no target number needed!
|
|
|
|
|
|
Two-Stage Workflow with Review Step:
|
|
|
1. Upload reference photos of your child (2-3 photos)
|
|
|
2. Upload all event photos (e.g., 1000 photos)
|
|
|
3. System filters to find photos containing your child
|
|
|
4. USER REVIEWS filtered photos (can remove false positives)
|
|
|
5. Quality-based selection runs on confirmed photos
|
|
|
6. Final results shown
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
import json
|
|
|
import uuid
|
|
|
import shutil
|
|
|
from pathlib import Path
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
try:
|
|
|
from dotenv import load_dotenv
|
|
|
load_dotenv()
|
|
|
except ImportError:
|
|
|
pass
|
|
|
|
|
|
from flask import Flask, render_template, request, jsonify, send_from_directory, send_file, session, redirect, Response
|
|
|
from werkzeug.utils import secure_filename
|
|
|
from werkzeug.exceptions import RequestEntityTooLarge
|
|
|
import numpy as np
|
|
|
from PIL import Image
|
|
|
import threading
|
|
|
import time
|
|
|
|
|
|
|
|
|
from supabase_storage import (
|
|
|
is_supabase_available,
|
|
|
save_dataset_to_supabase,
|
|
|
load_dataset_from_supabase,
|
|
|
list_datasets_from_supabase,
|
|
|
delete_dataset_from_supabase
|
|
|
)
|
|
|
|
|
|
|
|
|
try:
|
|
|
from pillow_heif import register_heif_opener
|
|
|
register_heif_opener()
|
|
|
except ImportError:
|
|
|
pass
|
|
|
|
|
|
app = Flask(__name__, static_folder='static', template_folder='templates')
|
|
|
app.secret_key = 'photo_selector_secret_key_2024'
|
|
|
|
|
|
|
|
|
UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'uploads')
|
|
|
RESULTS_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'results')
|
|
|
REFERENCE_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'references')
|
|
|
OUTPUT_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'selected_photos')
|
|
|
DATASETS_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'datasets')
|
|
|
ALLOWED_EXTENSIONS = {'jpg', 'jpeg', 'png', 'heic', 'heif', 'webp'}
|
|
|
MAX_CONTENT_LENGTH = 5 * 1024 * 1024 * 1024
|
|
|
|
|
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
|
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
|
|
|
app.config['MAX_FORM_MEMORY_SIZE'] = 5 * 1024 * 1024 * 1024
|
|
|
app.config['MAX_FORM_PARTS'] = 10000
|
|
|
|
|
|
|
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
|
|
os.makedirs(RESULTS_FOLDER, exist_ok=True)
|
|
|
os.makedirs(REFERENCE_FOLDER, exist_ok=True)
|
|
|
os.makedirs(DATASETS_FOLDER, exist_ok=True)
|
|
|
|
|
|
|
|
|
processing_jobs = {}
|
|
|
|
|
|
|
|
|
face_matchers = {}
|
|
|
|
|
|
|
|
|
upload_sessions = {}
|
|
|
|
|
|
|
|
|
|
|
|
@app.errorhandler(RequestEntityTooLarge)
|
|
|
def handle_large_upload(error):
|
|
|
return jsonify({
|
|
|
'error': 'Upload too large. Try uploading fewer files at once (max ~500 files per batch).'
|
|
|
}), 413
|
|
|
|
|
|
|
|
|
def allowed_file(filename):
|
|
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
|
|
|
def create_thumbnail(image_path, thumb_path, size=(300, 300)):
|
|
|
"""Create a thumbnail for display with proper EXIF rotation."""
|
|
|
from PIL import ExifTags
|
|
|
try:
|
|
|
with Image.open(image_path) as img:
|
|
|
|
|
|
try:
|
|
|
for orientation in ExifTags.TAGS.keys():
|
|
|
if ExifTags.TAGS[orientation] == 'Orientation':
|
|
|
break
|
|
|
exif = img._getexif()
|
|
|
if exif is not None:
|
|
|
orientation_value = exif.get(orientation)
|
|
|
if orientation_value == 3:
|
|
|
img = img.rotate(180, expand=True)
|
|
|
elif orientation_value == 6:
|
|
|
img = img.rotate(270, expand=True)
|
|
|
elif orientation_value == 8:
|
|
|
img = img.rotate(90, expand=True)
|
|
|
except (AttributeError, KeyError, IndexError):
|
|
|
pass
|
|
|
|
|
|
if img.mode != 'RGB':
|
|
|
img = img.convert('RGB')
|
|
|
img.thumbnail(size, Image.Resampling.LANCZOS)
|
|
|
img.save(thumb_path, 'JPEG', quality=85)
|
|
|
return True
|
|
|
except Exception as e:
|
|
|
print(f"Error creating thumbnail: {e}")
|
|
|
return False
|
|
|
|
|
|
|
|
|
def get_thumbnail_name(filename):
|
|
|
"""
|
|
|
Generate thumbnail name that includes the original extension to avoid collisions.
|
|
|
|
|
|
Example: IMG_5801.HEIC -> thumb_IMG_5801_HEIC.jpg
|
|
|
IMG_5801.jpg -> thumb_IMG_5801_jpg.jpg
|
|
|
"""
|
|
|
if '.' in filename:
|
|
|
name, ext = filename.rsplit('.', 1)
|
|
|
return f"thumb_{name}_{ext}.jpg"
|
|
|
else:
|
|
|
return f"thumb_{filename}.jpg"
|
|
|
|
|
|
|
|
|
def process_photos_face_filter_only(job_id, upload_dir, session_id=None):
|
|
|
"""
|
|
|
Phase 1: Face filtering only.
|
|
|
Scans all photos to find ones containing the target person.
|
|
|
Returns filtered photos for user review before quality selection.
|
|
|
"""
|
|
|
try:
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f"[Job {job_id}] PHASE 1: Face Filtering Started")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
processing_jobs[job_id]['status'] = 'processing'
|
|
|
processing_jobs[job_id]['progress'] = 5
|
|
|
processing_jobs[job_id]['message'] = 'Loading face recognition AI...'
|
|
|
|
|
|
print(f"[Job {job_id}] Loading InsightFace face recognition model...")
|
|
|
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
|
|
|
|
|
|
face_matcher = None
|
|
|
if session_id and session_id in face_matchers:
|
|
|
face_matcher = face_matchers[session_id]
|
|
|
if face_matcher.get_reference_count() == 0:
|
|
|
face_matcher = None
|
|
|
|
|
|
if face_matcher is None:
|
|
|
print(f"[Job {job_id}] ERROR: No reference photos loaded!")
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = 'No reference photos loaded'
|
|
|
return
|
|
|
|
|
|
ref_count = face_matcher.get_reference_count()
|
|
|
print(f"[Job {job_id}] Reference photos loaded: {ref_count}")
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 10
|
|
|
processing_jobs[job_id]['message'] = 'Scanning photos for your child using InsightFace...'
|
|
|
|
|
|
|
|
|
photo_files = []
|
|
|
for f in os.listdir(upload_dir):
|
|
|
if allowed_file(f) and not f.startswith('thumb_'):
|
|
|
photo_files.append(f)
|
|
|
|
|
|
total_photos = len(photo_files)
|
|
|
print(f"[Job {job_id}] Total photos to scan: {total_photos}")
|
|
|
processing_jobs[job_id]['total_photos'] = total_photos
|
|
|
processing_jobs[job_id]['message'] = f'Scanning {total_photos} photos for your child...'
|
|
|
|
|
|
|
|
|
|
|
|
is_local_folder = processing_jobs[job_id].get('is_local_folder', False)
|
|
|
if is_local_folder:
|
|
|
thumbs_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
else:
|
|
|
thumbs_dir = os.path.join(upload_dir, 'thumbnails')
|
|
|
os.makedirs(thumbs_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
photo_paths = [os.path.join(upload_dir, fn) for fn in photo_files]
|
|
|
|
|
|
|
|
|
def progress_callback(current, total, message):
|
|
|
processing_jobs[job_id]['photos_checked'] = current
|
|
|
processing_jobs[job_id]['message'] = f'Checked {current}/{total} photos...'
|
|
|
|
|
|
progress_pct = 30 + int((current / total) * 50) if total > 0 else 30
|
|
|
processing_jobs[job_id]['progress'] = progress_pct
|
|
|
|
|
|
|
|
|
print(f"[Job {job_id}] Starting face detection and matching...")
|
|
|
processing_jobs[job_id]['progress'] = 30
|
|
|
filter_results = face_matcher.filter_photos(photo_paths, progress_callback=progress_callback)
|
|
|
|
|
|
if 'error' in filter_results:
|
|
|
print(f"[Job {job_id}] ERROR: Face matching failed - {filter_results['error']}")
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = f"Face matching error: {filter_results['error']}"
|
|
|
return
|
|
|
|
|
|
|
|
|
stats = filter_results.get('statistics', {})
|
|
|
matched_count = len(filter_results.get('matched_photos', []))
|
|
|
unmatched_count = len(filter_results.get('unmatched_photos', []))
|
|
|
|
|
|
print(f"\n[Job {job_id}] Face Filtering Results:")
|
|
|
print(f" - Photos with your child: {matched_count}")
|
|
|
print(f" - Photos without match: {unmatched_count}")
|
|
|
print(f" - Photos with no faces: {stats.get('no_faces', 0)}")
|
|
|
|
|
|
match_rate = stats.get('match_rate', 0)
|
|
|
if isinstance(match_rate, str):
|
|
|
print(f" - Match rate: {match_rate}")
|
|
|
else:
|
|
|
print(f" - Match rate: {match_rate:.1%}")
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 70
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails: 0/{matched_count}'
|
|
|
|
|
|
print(f"[Job {job_id}] Creating thumbnails for {matched_count} matched photos...")
|
|
|
|
|
|
|
|
|
filtered_photos = []
|
|
|
for i, match in enumerate(filter_results['matched_photos']):
|
|
|
filename = os.path.basename(match['path'])
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumbs_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(match['path'], thumb_path)
|
|
|
|
|
|
filtered_photos.append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'face_match_score': match['similarity'],
|
|
|
'num_faces': match['num_faces'],
|
|
|
'matched_face_idx': match.get('matched_face_idx', 0),
|
|
|
'face_bboxes': match.get('face_bboxes', [])
|
|
|
})
|
|
|
|
|
|
|
|
|
if (i + 1) % 10 == 0 or (i + 1) == matched_count:
|
|
|
progress = 70 + int((i / matched_count) * 25)
|
|
|
processing_jobs[job_id]['progress'] = progress
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails: {i + 1}/{matched_count}'
|
|
|
print(f"[Job {job_id}] Thumbnails created: {i + 1}/{matched_count}")
|
|
|
|
|
|
|
|
|
filtered_photos.sort(key=lambda x: x['face_match_score'], reverse=True)
|
|
|
|
|
|
|
|
|
unmatched_photos = []
|
|
|
for unmatch in filter_results.get('unmatched_photos', []):
|
|
|
filename = os.path.basename(unmatch['path'])
|
|
|
|
|
|
timestamp = None
|
|
|
try:
|
|
|
from photo_selector.utils import get_photo_timestamp
|
|
|
dt = get_photo_timestamp(unmatch['path'])
|
|
|
if dt:
|
|
|
timestamp = dt.timestamp()
|
|
|
except:
|
|
|
pass
|
|
|
unmatched_photos.append({
|
|
|
'filename': filename,
|
|
|
'best_similarity': unmatch.get('best_similarity', 0),
|
|
|
'num_faces': unmatch.get('num_faces', 0),
|
|
|
'timestamp': timestamp
|
|
|
})
|
|
|
|
|
|
|
|
|
for no_face in filter_results.get('no_faces_photos', []):
|
|
|
filename = os.path.basename(no_face['path'])
|
|
|
timestamp = None
|
|
|
try:
|
|
|
from photo_selector.utils import get_photo_timestamp
|
|
|
dt = get_photo_timestamp(no_face['path'])
|
|
|
if dt:
|
|
|
timestamp = dt.timestamp()
|
|
|
except:
|
|
|
pass
|
|
|
unmatched_photos.append({
|
|
|
'filename': filename,
|
|
|
'best_similarity': 0,
|
|
|
'num_faces': 0,
|
|
|
'timestamp': timestamp
|
|
|
})
|
|
|
|
|
|
|
|
|
for error_photo in filter_results.get('error_photos', []):
|
|
|
filename = os.path.basename(error_photo['path'])
|
|
|
timestamp = None
|
|
|
try:
|
|
|
from photo_selector.utils import get_photo_timestamp
|
|
|
dt = get_photo_timestamp(error_photo['path'])
|
|
|
if dt:
|
|
|
timestamp = dt.timestamp()
|
|
|
except:
|
|
|
pass
|
|
|
unmatched_photos.append({
|
|
|
'filename': filename,
|
|
|
'best_similarity': 0,
|
|
|
'num_faces': 0,
|
|
|
'timestamp': timestamp,
|
|
|
'error': error_photo.get('error', 'Processing error')
|
|
|
})
|
|
|
|
|
|
|
|
|
unmatched_photos.sort(key=lambda x: x.get('timestamp') or 0)
|
|
|
|
|
|
|
|
|
review_data = {
|
|
|
'total_uploaded': total_photos,
|
|
|
'filtered_photos': filtered_photos,
|
|
|
'unmatched_photos': unmatched_photos,
|
|
|
'statistics': filter_results['statistics'],
|
|
|
'reference_count': face_matcher.get_reference_count()
|
|
|
}
|
|
|
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
with open(review_file, 'w') as f:
|
|
|
json.dump(review_data, f, indent=2, default=str)
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 100
|
|
|
processing_jobs[job_id]['status'] = 'review_pending'
|
|
|
processing_jobs[job_id]['message'] = f'Found your child in {len(filtered_photos)} of {total_photos} photos!'
|
|
|
processing_jobs[job_id]['review_data'] = review_data
|
|
|
|
|
|
print(f"\n[Job {job_id}] PHASE 1 COMPLETE!")
|
|
|
print(f" - Found {len(filtered_photos)} photos of your child")
|
|
|
print(f" - Status: review_pending (waiting for user to confirm)")
|
|
|
print(f" - Review data saved to: {review_file}")
|
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Job {job_id}] EXCEPTION: {str(e)}")
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = str(e)
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
def process_drive_with_parallel_face_detection(job_id, folder_id, upload_dir, face_matcher):
|
|
|
"""
|
|
|
HYBRID APPROACH: Download files from Google Drive while running face detection in parallel.
|
|
|
|
|
|
This overlaps network I/O (downloading) with GPU compute (face detection) for faster processing.
|
|
|
|
|
|
Flow:
|
|
|
- Download thread: Downloads files and adds paths to queue
|
|
|
- Face detection thread: Processes files from queue as they become ready
|
|
|
- Both run simultaneously for maximum efficiency
|
|
|
"""
|
|
|
import queue
|
|
|
import threading
|
|
|
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f"[Job {job_id}] HYBRID MODE: Parallel Download + Face Detection")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
|
|
|
file_queue = queue.Queue()
|
|
|
results_lock = threading.Lock()
|
|
|
matched_photos = []
|
|
|
unmatched_photos = []
|
|
|
no_faces_photos = []
|
|
|
error_photos = []
|
|
|
|
|
|
|
|
|
download_complete = threading.Event()
|
|
|
total_files = [0]
|
|
|
downloaded_count = [0]
|
|
|
processed_count = [0]
|
|
|
|
|
|
|
|
|
def face_detection_worker():
|
|
|
"""Process files from queue as they become available."""
|
|
|
while True:
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
filepath = file_queue.get(timeout=1.0)
|
|
|
except queue.Empty:
|
|
|
|
|
|
if download_complete.is_set() and file_queue.empty():
|
|
|
break
|
|
|
continue
|
|
|
|
|
|
if filepath is None:
|
|
|
break
|
|
|
|
|
|
|
|
|
result = face_matcher.check_photo_for_target(filepath)
|
|
|
|
|
|
with results_lock:
|
|
|
processed_count[0] += 1
|
|
|
|
|
|
if 'error' in result:
|
|
|
error_photos.append({'path': filepath, 'error': result['error']})
|
|
|
elif result['num_faces'] == 0:
|
|
|
no_faces_photos.append({'path': filepath, 'num_faces': 0})
|
|
|
elif result['contains_target']:
|
|
|
matched_photos.append({
|
|
|
'path': filepath,
|
|
|
'similarity': result['best_match_similarity'],
|
|
|
'num_faces': result['num_faces'],
|
|
|
'all_similarities': result.get('all_face_similarities', []),
|
|
|
'face_bboxes': result.get('face_bboxes', [])
|
|
|
})
|
|
|
else:
|
|
|
unmatched_photos.append({
|
|
|
'path': filepath,
|
|
|
'best_similarity': result['best_match_similarity'],
|
|
|
'num_faces': result['num_faces']
|
|
|
})
|
|
|
|
|
|
|
|
|
if processed_count[0] % 10 == 0:
|
|
|
|
|
|
if download_complete.is_set():
|
|
|
pct = 30 + int((processed_count[0] / max(total_files[0], 1)) * 40)
|
|
|
processing_jobs[job_id]['progress'] = min(pct, 70)
|
|
|
processing_jobs[job_id]['message'] = f'Scanning faces: {processed_count[0]}/{total_files[0]}'
|
|
|
processing_jobs[job_id]['photos_checked'] = processed_count[0]
|
|
|
print(f"[Job {job_id}] [HYBRID] Downloaded: {downloaded_count[0]}, Face checked: {processed_count[0]}, Matched: {len(matched_photos)}")
|
|
|
|
|
|
file_queue.task_done()
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Job {job_id}] Face detection error: {e}")
|
|
|
continue
|
|
|
|
|
|
|
|
|
def on_file_ready(filepath):
|
|
|
"""Called by download_folder when each file is ready."""
|
|
|
with results_lock:
|
|
|
downloaded_count[0] += 1
|
|
|
file_queue.put(filepath)
|
|
|
|
|
|
|
|
|
def download_progress(current, total, _filename):
|
|
|
total_files[0] = total
|
|
|
pct = 5 + int((current / total) * 25)
|
|
|
processing_jobs[job_id]['progress'] = pct
|
|
|
processing_jobs[job_id]['message'] = f'Downloading: {current}/{total}, Scanning: {processed_count[0]}'
|
|
|
processing_jobs[job_id]['total_files'] = total
|
|
|
|
|
|
try:
|
|
|
processing_jobs[job_id]['status'] = 'processing'
|
|
|
processing_jobs[job_id]['progress'] = 5
|
|
|
processing_jobs[job_id]['message'] = 'Starting parallel download and face detection...'
|
|
|
|
|
|
|
|
|
num_workers = 4
|
|
|
workers = []
|
|
|
for _ in range(num_workers):
|
|
|
t = threading.Thread(target=face_detection_worker)
|
|
|
t.daemon = True
|
|
|
t.start()
|
|
|
workers.append(t)
|
|
|
|
|
|
print(f"[Job {job_id}] Started {num_workers} face detection workers")
|
|
|
|
|
|
|
|
|
print(f"[Job {job_id}] Starting Google Drive download with parallel face detection...")
|
|
|
|
|
|
download_folder(
|
|
|
folder_id,
|
|
|
upload_dir,
|
|
|
progress_callback=download_progress,
|
|
|
file_ready_callback=on_file_ready
|
|
|
)
|
|
|
|
|
|
|
|
|
download_complete.set()
|
|
|
print(f"[Job {job_id}] Download complete. Waiting for face detection to finish...")
|
|
|
|
|
|
|
|
|
file_queue.join()
|
|
|
|
|
|
|
|
|
for _ in workers:
|
|
|
file_queue.put(None)
|
|
|
|
|
|
|
|
|
for t in workers:
|
|
|
t.join(timeout=5.0)
|
|
|
|
|
|
print(f"\n[Job {job_id}] HYBRID Face Detection Results:")
|
|
|
print(f" - Photos with your child: {len(matched_photos)}")
|
|
|
print(f" - Photos without match: {len(unmatched_photos)}")
|
|
|
print(f" - Photos with no faces: {len(no_faces_photos)}")
|
|
|
print(f" - Photos with errors: {len(error_photos)}")
|
|
|
if error_photos:
|
|
|
print(f" [ERRORS] First 5 error photos:")
|
|
|
for ep in error_photos[:5]:
|
|
|
print(f" - {os.path.basename(ep['path'])}: {ep.get('error', 'Unknown error')}")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 75
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails for {len(matched_photos)} photos...'
|
|
|
|
|
|
thumbs_dir = os.path.join(upload_dir, 'thumbnails')
|
|
|
os.makedirs(thumbs_dir, exist_ok=True)
|
|
|
|
|
|
filtered_photos = []
|
|
|
for i, match in enumerate(matched_photos):
|
|
|
filename = os.path.basename(match['path'])
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumbs_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(match['path'], thumb_path)
|
|
|
|
|
|
filtered_photos.append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'face_match_score': match['similarity'],
|
|
|
'num_faces': match['num_faces'],
|
|
|
'face_bboxes': match.get('face_bboxes', [])
|
|
|
})
|
|
|
|
|
|
if (i + 1) % 20 == 0:
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails: {i + 1}/{len(matched_photos)}'
|
|
|
|
|
|
|
|
|
filtered_photos.sort(key=lambda x: x['face_match_score'], reverse=True)
|
|
|
|
|
|
|
|
|
unmatched_data = []
|
|
|
for unmatch in unmatched_photos:
|
|
|
filename = os.path.basename(unmatch['path'])
|
|
|
unmatched_data.append({
|
|
|
'filename': filename,
|
|
|
'best_similarity': unmatch.get('best_similarity', 0),
|
|
|
'num_faces': unmatch.get('num_faces', 0)
|
|
|
})
|
|
|
|
|
|
for no_face in no_faces_photos:
|
|
|
filename = os.path.basename(no_face['path'])
|
|
|
unmatched_data.append({
|
|
|
'filename': filename,
|
|
|
'best_similarity': 0,
|
|
|
'num_faces': 0
|
|
|
})
|
|
|
|
|
|
|
|
|
for error_photo in error_photos:
|
|
|
filename = os.path.basename(error_photo['path'])
|
|
|
unmatched_data.append({
|
|
|
'filename': filename,
|
|
|
'best_similarity': 0,
|
|
|
'num_faces': 0,
|
|
|
'error': error_photo.get('error', 'Processing error')
|
|
|
})
|
|
|
|
|
|
|
|
|
review_data = {
|
|
|
'total_uploaded': total_files[0],
|
|
|
'filtered_photos': filtered_photos,
|
|
|
'unmatched_photos': unmatched_data,
|
|
|
'statistics': {
|
|
|
'total_scanned': total_files[0],
|
|
|
'matched': len(matched_photos),
|
|
|
'unmatched': len(unmatched_photos),
|
|
|
'no_faces': len(no_faces_photos),
|
|
|
'errors': len(error_photos),
|
|
|
'match_rate': f"{(len(matched_photos) / max(total_files[0], 1) * 100):.1f}%"
|
|
|
},
|
|
|
'reference_count': face_matcher.get_reference_count()
|
|
|
}
|
|
|
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
with open(review_file, 'w') as f:
|
|
|
json.dump(review_data, f, indent=2, default=str)
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 100
|
|
|
processing_jobs[job_id]['status'] = 'review_pending'
|
|
|
processing_jobs[job_id]['message'] = f'Found your child in {len(filtered_photos)} of {total_files[0]} photos!'
|
|
|
processing_jobs[job_id]['review_data'] = review_data
|
|
|
|
|
|
print(f"\n[Job {job_id}] HYBRID MODE COMPLETE!")
|
|
|
print(f" - Found {len(filtered_photos)} photos of your child")
|
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Job {job_id}] HYBRID EXCEPTION: {str(e)}")
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = str(e)
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
def save_photos_by_month(job_id, upload_dir, selected_photos, rejected_photos, month_stats):
|
|
|
"""
|
|
|
Automatically save both selected and not-selected photos organized by month.
|
|
|
|
|
|
Creates folder structure:
|
|
|
selected_photos/
|
|
|
└── {job_id}_{timestamp}/
|
|
|
├── selected/
|
|
|
│ ├── Jan/
|
|
|
│ │ ├── photo1.jpg
|
|
|
│ │ └── photo2.jpg
|
|
|
│ ├── Feb/
|
|
|
│ │ └── photo3.jpg
|
|
|
│ └── ...
|
|
|
├── not_selected/
|
|
|
│ ├── Jan/
|
|
|
│ │ └── photo4.jpg
|
|
|
│ ├── Feb/
|
|
|
│ │ └── photo5.jpg
|
|
|
│ └── ...
|
|
|
└── summary.txt
|
|
|
|
|
|
Args:
|
|
|
job_id: The job identifier
|
|
|
upload_dir: Source directory containing original photos
|
|
|
selected_photos: List of selected photo dicts with 'filename' and 'month' keys
|
|
|
rejected_photos: List of rejected photo dicts with 'filename' and 'month' keys
|
|
|
month_stats: Statistics about each month's selection
|
|
|
|
|
|
Returns:
|
|
|
Path to the output folder
|
|
|
"""
|
|
|
try:
|
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
output_base = os.path.join(OUTPUT_FOLDER, f"{job_id}_{timestamp}")
|
|
|
os.makedirs(output_base, exist_ok=True)
|
|
|
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f" AUTO-SAVING PHOTOS BY MONTH (SELECTED & NOT SELECTED)")
|
|
|
print(f"{'='*60}")
|
|
|
print(f" Output folder: {output_base}")
|
|
|
|
|
|
|
|
|
selected_base = os.path.join(output_base, "selected")
|
|
|
not_selected_base = os.path.join(output_base, "not_selected")
|
|
|
os.makedirs(selected_base, exist_ok=True)
|
|
|
os.makedirs(not_selected_base, exist_ok=True)
|
|
|
|
|
|
|
|
|
selected_by_month = {}
|
|
|
for photo in selected_photos:
|
|
|
month = photo.get('month', 'Unknown')
|
|
|
if month not in selected_by_month:
|
|
|
selected_by_month[month] = []
|
|
|
selected_by_month[month].append(photo)
|
|
|
|
|
|
|
|
|
rejected_by_month = {}
|
|
|
for photo in rejected_photos:
|
|
|
month = photo.get('month', 'Unknown')
|
|
|
if month not in rejected_by_month:
|
|
|
rejected_by_month[month] = []
|
|
|
rejected_by_month[month].append(photo)
|
|
|
|
|
|
|
|
|
print(f"\n --- SELECTED PHOTOS ---")
|
|
|
total_selected_copied = 0
|
|
|
for month, photos in selected_by_month.items():
|
|
|
month_folder = os.path.join(selected_base, month)
|
|
|
os.makedirs(month_folder, exist_ok=True)
|
|
|
|
|
|
print(f" [selected/{month}] Saving {len(photos)} photos...")
|
|
|
|
|
|
for photo in photos:
|
|
|
src_path = os.path.join(upload_dir, photo['filename'])
|
|
|
dst_path = os.path.join(month_folder, photo['filename'])
|
|
|
|
|
|
if os.path.exists(src_path):
|
|
|
shutil.copy2(src_path, dst_path)
|
|
|
total_selected_copied += 1
|
|
|
|
|
|
|
|
|
print(f"\n --- NOT SELECTED PHOTOS ---")
|
|
|
total_rejected_copied = 0
|
|
|
for month, photos in rejected_by_month.items():
|
|
|
month_folder = os.path.join(not_selected_base, month)
|
|
|
os.makedirs(month_folder, exist_ok=True)
|
|
|
|
|
|
print(f" [not_selected/{month}] Saving {len(photos)} photos...")
|
|
|
|
|
|
for photo in photos:
|
|
|
src_path = os.path.join(upload_dir, photo['filename'])
|
|
|
dst_path = os.path.join(month_folder, photo['filename'])
|
|
|
|
|
|
if os.path.exists(src_path):
|
|
|
shutil.copy2(src_path, dst_path)
|
|
|
total_rejected_copied += 1
|
|
|
|
|
|
|
|
|
summary_path = os.path.join(output_base, "summary.txt")
|
|
|
with open(summary_path, 'w') as f:
|
|
|
f.write("=" * 60 + "\n")
|
|
|
f.write(" PHOTO SELECTION SUMMARY\n")
|
|
|
f.write("=" * 60 + "\n\n")
|
|
|
f.write(f"Job ID: {job_id}\n")
|
|
|
f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
|
f.write(f"Total Selected: {total_selected_copied} photos\n")
|
|
|
f.write(f"Total Not Selected: {total_rejected_copied} photos\n")
|
|
|
f.write(f"Grand Total: {total_selected_copied + total_rejected_copied} photos\n\n")
|
|
|
|
|
|
f.write("-" * 40 + "\n")
|
|
|
f.write(" BREAKDOWN BY MONTH\n")
|
|
|
f.write("-" * 40 + "\n\n")
|
|
|
f.write(f"{'Month':<12} {'Selected':>10} {'Not Selected':>14} {'Total':>8}\n")
|
|
|
f.write(f"{'-'*12} {'-'*10} {'-'*14} {'-'*8}\n")
|
|
|
|
|
|
for stat in month_stats:
|
|
|
month = stat['month']
|
|
|
selected = stat['selected']
|
|
|
total = stat['total_photos']
|
|
|
not_selected = total - selected
|
|
|
f.write(f"{month:<12} {selected:>10} {not_selected:>14} {total:>8}\n")
|
|
|
|
|
|
|
|
|
f.write("\n" + "=" * 60 + "\n")
|
|
|
f.write(" SELECTED FILES BY MONTH\n")
|
|
|
f.write("=" * 60 + "\n")
|
|
|
|
|
|
for month, photos in sorted(selected_by_month.items()):
|
|
|
f.write(f"\n[{month}] - {len(photos)} selected photos:\n")
|
|
|
for photo in sorted(photos, key=lambda x: x.get('score', 0), reverse=True):
|
|
|
score = photo.get('score', 0) * 100
|
|
|
cluster = photo.get('cluster_id', -1)
|
|
|
f.write(f" + {photo['filename']} (Score: {score:.0f}%, Cluster: {cluster})\n")
|
|
|
|
|
|
|
|
|
f.write("\n" + "=" * 60 + "\n")
|
|
|
f.write(" NOT SELECTED FILES BY MONTH\n")
|
|
|
f.write("=" * 60 + "\n")
|
|
|
|
|
|
for month, photos in sorted(rejected_by_month.items()):
|
|
|
f.write(f"\n[{month}] - {len(photos)} not selected photos:\n")
|
|
|
for photo in sorted(photos, key=lambda x: x.get('score', 0), reverse=True):
|
|
|
score = photo.get('score', 0) * 100
|
|
|
cluster = photo.get('cluster_id', -1)
|
|
|
f.write(f" - {photo['filename']} (Score: {score:.0f}%, Cluster: {cluster})\n")
|
|
|
|
|
|
print(f"\n SUMMARY:")
|
|
|
print(f" - Selected photos saved: {total_selected_copied}")
|
|
|
print(f" - Not selected photos saved: {total_rejected_copied}")
|
|
|
print(f" - Total photos saved: {total_selected_copied + total_rejected_copied}")
|
|
|
print(f" - Summary written to: {summary_path}")
|
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
return output_base
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[ERROR] Failed to save photos by month: {str(e)}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return None
|
|
|
|
|
|
|
|
|
def process_photos_quality_selection(job_id, upload_dir, quality_mode, similarity_threshold, confirmed_photos, face_data_cache=None, embedding_model='siglip'):
|
|
|
"""
|
|
|
Phase 2: Month-based category-aware photo selection.
|
|
|
Selects ~40 best photos per month with category diversity.
|
|
|
|
|
|
Args:
|
|
|
face_data_cache: Dict of filename -> {'num_faces': int, 'face_bboxes': list}
|
|
|
Cached face data from Step 2 to avoid re-detection
|
|
|
embedding_model: 'siglip' or 'clip' - which embedding model to use
|
|
|
"""
|
|
|
face_data_cache = face_data_cache or {}
|
|
|
try:
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f"[Job {job_id}] PHASE 2: Monthly Category-Aware Selection Started")
|
|
|
print(f"{'='*60}")
|
|
|
print(f"[Job {job_id}] Confirmed photos: {len(confirmed_photos)}")
|
|
|
print(f"[Job {job_id}] Quality mode: {quality_mode}")
|
|
|
print(f"[Job {job_id}] Similarity threshold: {similarity_threshold}")
|
|
|
print(f"[Job {job_id}] Embedding model: {embedding_model.upper()}")
|
|
|
|
|
|
processing_jobs[job_id]['status'] = 'processing'
|
|
|
processing_jobs[job_id]['progress'] = 5
|
|
|
processing_jobs[job_id]['message'] = f'Loading {embedding_model.upper()} model...'
|
|
|
|
|
|
|
|
|
from photo_selector.monthly_selector import MonthlyPhotoSelector
|
|
|
if embedding_model == 'clip':
|
|
|
from photo_selector.clip_embeddings import CLIPEmbedder as Embedder
|
|
|
model_display_name = 'CLIP'
|
|
|
else:
|
|
|
from photo_selector.siglip_embeddings import SigLIPEmbedder as Embedder
|
|
|
model_display_name = 'SigLIP'
|
|
|
|
|
|
|
|
|
if quality_mode == 'keep_more':
|
|
|
target_per_month = 60
|
|
|
elif quality_mode == 'strict':
|
|
|
target_per_month = 25
|
|
|
else:
|
|
|
target_per_month = 40
|
|
|
|
|
|
print(f"[Job {job_id}] Target per month: {target_per_month}")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 10
|
|
|
processing_jobs[job_id]['message'] = f'Checking embedding cache...'
|
|
|
|
|
|
print(f"[Job {job_id}] Processing {len(confirmed_photos)} photos for {model_display_name} embeddings...")
|
|
|
|
|
|
|
|
|
from supabase_storage import (
|
|
|
compute_file_hash,
|
|
|
get_cached_embeddings_batch,
|
|
|
save_embeddings_batch,
|
|
|
is_supabase_available
|
|
|
)
|
|
|
|
|
|
|
|
|
file_hashes = {}
|
|
|
hash_to_filename = {}
|
|
|
|
|
|
print(f"[Job {job_id}] Computing file hashes...")
|
|
|
for i, filename in enumerate(confirmed_photos):
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
if os.path.exists(filepath):
|
|
|
file_hash = compute_file_hash(filepath)
|
|
|
if file_hash:
|
|
|
file_hashes[filename] = file_hash
|
|
|
hash_to_filename[file_hash] = filename
|
|
|
|
|
|
|
|
|
if i % 100 == 0:
|
|
|
progress = 10 + int((i / len(confirmed_photos)) * 5)
|
|
|
processing_jobs[job_id]['progress'] = progress
|
|
|
|
|
|
print(f"[Job {job_id}] Computed {len(file_hashes)} hashes")
|
|
|
|
|
|
|
|
|
embeddings = {}
|
|
|
cached_count = 0
|
|
|
uncached_filenames = []
|
|
|
|
|
|
if is_supabase_available() and file_hashes:
|
|
|
processing_jobs[job_id]['message'] = f'Checking embedding cache...'
|
|
|
all_hashes = list(file_hashes.values())
|
|
|
|
|
|
|
|
|
cached_embeddings = {}
|
|
|
batch_size = 500
|
|
|
for i in range(0, len(all_hashes), batch_size):
|
|
|
batch_hashes = all_hashes[i:i + batch_size]
|
|
|
batch_result = get_cached_embeddings_batch(batch_hashes, embedding_model)
|
|
|
cached_embeddings.update(batch_result)
|
|
|
|
|
|
|
|
|
for filename, file_hash in file_hashes.items():
|
|
|
if file_hash in cached_embeddings:
|
|
|
embeddings[filename] = cached_embeddings[file_hash]
|
|
|
cached_count += 1
|
|
|
else:
|
|
|
uncached_filenames.append(filename)
|
|
|
|
|
|
print(f"[Job {job_id}] Cache hit: {cached_count}/{len(file_hashes)} embeddings")
|
|
|
else:
|
|
|
uncached_filenames = list(file_hashes.keys())
|
|
|
print(f"[Job {job_id}] Cache not available, computing all embeddings")
|
|
|
|
|
|
|
|
|
newly_computed = {}
|
|
|
if uncached_filenames:
|
|
|
processing_jobs[job_id]['message'] = f'Analyzing {len(uncached_filenames)} photos with {model_display_name}...'
|
|
|
print(f"[Job {job_id}] Computing {model_display_name} embeddings for {len(uncached_filenames)} uncached photos...")
|
|
|
|
|
|
embedder = Embedder()
|
|
|
|
|
|
for i, filename in enumerate(uncached_filenames):
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
if os.path.exists(filepath):
|
|
|
img = embedder.load_image(filepath)
|
|
|
if img is not None:
|
|
|
embedding = embedder.get_embedding(img)
|
|
|
if embedding is not None:
|
|
|
embeddings[filename] = embedding
|
|
|
newly_computed[filename] = embedding
|
|
|
img.close()
|
|
|
|
|
|
|
|
|
progress = 15 + int((i / len(uncached_filenames)) * 15)
|
|
|
processing_jobs[job_id]['progress'] = progress
|
|
|
|
|
|
print(f"[Job {job_id}] Computed {len(newly_computed)} new embeddings")
|
|
|
|
|
|
|
|
|
if newly_computed and is_supabase_available():
|
|
|
processing_jobs[job_id]['message'] = 'Saving embeddings to cache...'
|
|
|
saved = save_embeddings_batch(newly_computed, file_hashes, embedding_model)
|
|
|
print(f"[Job {job_id}] Saved {saved} embeddings to cache")
|
|
|
|
|
|
print(f"[Job {job_id}] Total embeddings: {len(embeddings)} (cached: {cached_count}, computed: {len(newly_computed)})")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 35
|
|
|
processing_jobs[job_id]['message'] = 'Grouping photos by month...'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
selector = MonthlyPhotoSelector(
|
|
|
target_per_month=target_per_month,
|
|
|
duplicate_threshold=0.85,
|
|
|
diversity_threshold=0.75
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
MONTH_NAMES = {
|
|
|
1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr",
|
|
|
5: "May", 6: "Jun", 7: "Jul", 8: "Aug",
|
|
|
9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
|
|
|
}
|
|
|
|
|
|
photos_by_month = defaultdict(list)
|
|
|
|
|
|
|
|
|
timestamp_found = 0
|
|
|
timestamp_missing = 0
|
|
|
|
|
|
for filename in confirmed_photos:
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
if not os.path.exists(filepath):
|
|
|
print(f"[TIMESTAMP DEBUG] File not found: {filepath}")
|
|
|
continue
|
|
|
|
|
|
dt = selector.get_photo_date(filepath)
|
|
|
if dt:
|
|
|
timestamp_found += 1
|
|
|
else:
|
|
|
timestamp_missing += 1
|
|
|
|
|
|
|
|
|
cached_face = face_data_cache.get(filename, {})
|
|
|
|
|
|
photo_info = {
|
|
|
'filename': filename,
|
|
|
'filepath': filepath,
|
|
|
'date': dt.isoformat() if dt else None,
|
|
|
'month': MONTH_NAMES.get(dt.month, "Unknown") if dt else "Unknown",
|
|
|
'timestamp': dt.timestamp() if dt else None,
|
|
|
|
|
|
'num_faces': cached_face.get('num_faces'),
|
|
|
'face_bboxes': cached_face.get('face_bboxes', [])
|
|
|
}
|
|
|
|
|
|
photos_by_month[photo_info['month']].append(photo_info)
|
|
|
|
|
|
|
|
|
month_order = list(MONTH_NAMES.values()) + ['Unknown']
|
|
|
photos_by_month = {m: photos_by_month[m] for m in month_order if m in photos_by_month}
|
|
|
|
|
|
print(f"[TIMESTAMP DEBUG] Timestamps found: {timestamp_found}, missing: {timestamp_missing}")
|
|
|
print(f"[Job {job_id}] Photos grouped into {len(photos_by_month)} months:")
|
|
|
for month, photos in photos_by_month.items():
|
|
|
print(f" - {month}: {len(photos)} photos")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 60
|
|
|
processing_jobs[job_id]['message'] = 'Selecting best photos per month...'
|
|
|
|
|
|
def progress_callback(msg):
|
|
|
processing_jobs[job_id]['message'] = msg
|
|
|
|
|
|
selection_results = selector.select_all_months(photos_by_month, embeddings, progress_callback)
|
|
|
|
|
|
selected_photos = selection_results['selected']
|
|
|
month_stats = selection_results['month_stats']
|
|
|
summary = selection_results['summary']
|
|
|
|
|
|
print(f"\n[Job {job_id}] Selection Results:")
|
|
|
print(f" - Total photos: {summary['total_photos']}")
|
|
|
print(f" - Selected: {summary['total_selected']}")
|
|
|
print(f" - Selection rate: {summary['selection_rate']*100:.1f}%")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 75
|
|
|
processing_jobs[job_id]['message'] = 'Detecting categories for selected photos...'
|
|
|
|
|
|
print(f"[Job {job_id}] Detecting categories for {len(selected_photos)} selected photos...")
|
|
|
selected_paths = [p['filepath'] for p in selected_photos]
|
|
|
if selected_paths:
|
|
|
selector._ensure_category_detector()
|
|
|
categories = selector.category_detector.detect_categories_batch(selected_paths)
|
|
|
for photo in selected_photos:
|
|
|
|
|
|
cat, conf = categories.get(photo['filename'], ('unknown', 0.0))
|
|
|
photo['category'] = cat
|
|
|
photo['category_confidence'] = conf
|
|
|
|
|
|
|
|
|
for stat in month_stats:
|
|
|
month_name = stat['month']
|
|
|
month_selected = [p for p in selected_photos if p.get('month') == month_name]
|
|
|
cat_breakdown = {}
|
|
|
for p in month_selected:
|
|
|
cat = p.get('category', 'unknown')
|
|
|
cat_breakdown[cat] = cat_breakdown.get(cat, 0) + 1
|
|
|
stat['categories'] = cat_breakdown
|
|
|
|
|
|
|
|
|
|
|
|
selected_filenames = {p['filename'] for p in selected_photos}
|
|
|
rejected_photos = []
|
|
|
|
|
|
for month, photos in photos_by_month.items():
|
|
|
for photo in photos:
|
|
|
if photo['filename'] not in selected_filenames:
|
|
|
|
|
|
if not photo.get('rejection_reason'):
|
|
|
photo['rejection_reason'] = 'Not selected for month quota'
|
|
|
rejected_photos.append(photo)
|
|
|
|
|
|
|
|
|
thumbs_dir = os.path.join(upload_dir, 'thumbnails')
|
|
|
os.makedirs(thumbs_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
total_thumbnails = len(selected_photos) + len(rejected_photos)
|
|
|
thumbnails_created = 0
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 85
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails: 0/{total_thumbnails}'
|
|
|
|
|
|
|
|
|
results = {
|
|
|
'selected': [],
|
|
|
'rejected': [],
|
|
|
'summary': {
|
|
|
'total_photos': summary['total_photos'],
|
|
|
'selected_count': summary['total_selected'],
|
|
|
'rejected_count': len(rejected_photos),
|
|
|
'selection_rate': summary['selection_rate'],
|
|
|
'face_filtering': {
|
|
|
'total_photos': processing_jobs[job_id].get('total_uploaded', len(confirmed_photos)),
|
|
|
'after_face_filter': len(confirmed_photos),
|
|
|
'user_confirmed': len(confirmed_photos)
|
|
|
},
|
|
|
'total_processed': len(confirmed_photos)
|
|
|
},
|
|
|
'month_stats': month_stats,
|
|
|
'rejection_breakdown': {}
|
|
|
}
|
|
|
|
|
|
|
|
|
rejection_counts = defaultdict(int)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cluster_total_counts = defaultdict(int)
|
|
|
for month, photos in photos_by_month.items():
|
|
|
for photo in photos:
|
|
|
cid = photo.get('cluster_id', -1)
|
|
|
if cid != -1:
|
|
|
cluster_total_counts[(month, cid)] += 1
|
|
|
|
|
|
|
|
|
cluster_selected_counts = defaultdict(int)
|
|
|
for photo in selected_photos:
|
|
|
month = photo.get('month', 'Unknown')
|
|
|
cid = photo.get('cluster_id', -1)
|
|
|
if cid != -1:
|
|
|
cluster_selected_counts[(month, cid)] += 1
|
|
|
|
|
|
|
|
|
for photo in selected_photos:
|
|
|
filename = photo['filename']
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumbs_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(os.path.join(upload_dir, filename), thumb_path)
|
|
|
|
|
|
|
|
|
thumbnails_created += 1
|
|
|
if thumbnails_created % 10 == 0 or thumbnails_created == total_thumbnails:
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails: {thumbnails_created}/{total_thumbnails}'
|
|
|
|
|
|
|
|
|
photo_embedding = embeddings.get(filename)
|
|
|
embedding_list = photo_embedding.tolist() if photo_embedding is not None else None
|
|
|
|
|
|
|
|
|
cid = photo.get('cluster_id', -1)
|
|
|
month = photo.get('month', 'Unknown')
|
|
|
cluster_total = cluster_total_counts.get((month, cid), 0) if cid != -1 else 0
|
|
|
cluster_selected = cluster_selected_counts.get((month, cid), 0) if cid != -1 else 0
|
|
|
|
|
|
results['selected'].append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'score': float(photo.get('total', 0)),
|
|
|
'face_quality': float(photo.get('face_quality', 0)),
|
|
|
'aesthetic_quality': float(photo.get('aesthetic_quality', 0)),
|
|
|
'emotional_signal': float(photo.get('emotional_signal', 0)),
|
|
|
'uniqueness': float(photo.get('uniqueness', 0)),
|
|
|
'bucket': photo.get('month', 'unknown'),
|
|
|
'month': month,
|
|
|
'category': photo.get('category', 'unknown'),
|
|
|
'num_faces': int(photo.get('num_faces', 0)),
|
|
|
'cluster_id': cid,
|
|
|
'original_cluster_id': photo.get('original_cluster_id', cid),
|
|
|
'cluster_total': cluster_total,
|
|
|
'cluster_selected': cluster_selected,
|
|
|
'event_id': photo.get('event_id', -1),
|
|
|
'max_similarity': float(photo.get('max_similarity', 0)),
|
|
|
'embedding': embedding_list,
|
|
|
'selection_reason': f"Best in {photo.get('category', 'category')} for {month}",
|
|
|
'selection_detail': f"Selected from {month} - Category: {photo.get('category', 'unknown')}"
|
|
|
})
|
|
|
|
|
|
|
|
|
for photo in rejected_photos:
|
|
|
filename = photo['filename']
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumbs_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(os.path.join(upload_dir, filename), thumb_path)
|
|
|
|
|
|
|
|
|
thumbnails_created += 1
|
|
|
if thumbnails_created % 10 == 0 or thumbnails_created == total_thumbnails:
|
|
|
processing_jobs[job_id]['message'] = f'Creating thumbnails: {thumbnails_created}/{total_thumbnails}'
|
|
|
|
|
|
|
|
|
rejection_reason = photo.get('rejection_reason', 'Better photos selected')
|
|
|
|
|
|
|
|
|
if 'Event' in rejection_reason:
|
|
|
breakdown_category = "Same event"
|
|
|
elif 'Cluster' in rejection_reason:
|
|
|
breakdown_category = "Same cluster"
|
|
|
elif 'similar' in rejection_reason.lower():
|
|
|
breakdown_category = "Too similar"
|
|
|
elif 'Target' in rejection_reason:
|
|
|
breakdown_category = "Target reached"
|
|
|
else:
|
|
|
breakdown_category = "Other"
|
|
|
rejection_counts[breakdown_category] += 1
|
|
|
|
|
|
|
|
|
photo_embedding = embeddings.get(filename)
|
|
|
embedding_list = photo_embedding.tolist() if photo_embedding is not None else None
|
|
|
|
|
|
|
|
|
cid = photo.get('cluster_id', -1)
|
|
|
month = photo.get('month', 'Unknown')
|
|
|
cluster_total = cluster_total_counts.get((month, cid), 0) if cid != -1 else 0
|
|
|
cluster_selected = cluster_selected_counts.get((month, cid), 0) if cid != -1 else 0
|
|
|
|
|
|
results['rejected'].append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'score': float(photo.get('total', 0)),
|
|
|
'face_quality': float(photo.get('face_quality', 0)),
|
|
|
'aesthetic_quality': float(photo.get('aesthetic_quality', 0)),
|
|
|
'bucket': photo.get('month', 'unknown'),
|
|
|
'month': month,
|
|
|
'category': photo.get('category', 'unknown'),
|
|
|
'cluster_id': cid,
|
|
|
'original_cluster_id': photo.get('original_cluster_id', cid),
|
|
|
'cluster_total': cluster_total,
|
|
|
'cluster_selected': cluster_selected,
|
|
|
'event_id': photo.get('event_id', -1),
|
|
|
'max_similarity': float(photo.get('max_similarity', 0)),
|
|
|
'embedding': embedding_list,
|
|
|
'rejection_reason': rejection_reason,
|
|
|
'reason': rejection_reason,
|
|
|
'reason_detail': f"Category: {photo.get('category', 'unknown')}"
|
|
|
})
|
|
|
|
|
|
results['rejection_breakdown'] = dict(rejection_counts)
|
|
|
|
|
|
|
|
|
face_filter_data = results['summary'].get('face_filtering', {})
|
|
|
total_uploaded = face_filter_data.get('total_photos', 0)
|
|
|
after_face_filter = face_filter_data.get('after_face_filter', 0)
|
|
|
face_filtered_out = total_uploaded - after_face_filter
|
|
|
if face_filtered_out > 0:
|
|
|
results['rejection_breakdown']['Face not detected'] = face_filtered_out
|
|
|
|
|
|
|
|
|
results['selected'].sort(key=lambda x: x['score'], reverse=True)
|
|
|
results['rejected'].sort(key=lambda x: x['score'], reverse=True)
|
|
|
|
|
|
|
|
|
results_file = os.path.join(RESULTS_FOLDER, f"{job_id}.json")
|
|
|
with open(results_file, 'w') as f:
|
|
|
json.dump(results, f, indent=2, default=str)
|
|
|
|
|
|
processing_jobs[job_id]['status'] = 'complete'
|
|
|
processing_jobs[job_id]['progress'] = 100
|
|
|
processing_jobs[job_id]['message'] = 'Selection complete!'
|
|
|
processing_jobs[job_id]['results'] = results
|
|
|
|
|
|
print(f"\n[Job {job_id}] PHASE 2 COMPLETE!")
|
|
|
print(f" - Final selection: {len(results['selected'])} photos")
|
|
|
print(f" - Filtered out: {len(results['rejected'])} photos")
|
|
|
print(f" - Results saved to: {results_file}")
|
|
|
print(f"\n=== Month Distribution ===")
|
|
|
for stat in month_stats:
|
|
|
print(f" {stat['month']}: {stat['selected']}/{stat['total_photos']} ({stat['category_summary']})")
|
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Job {job_id}] EXCEPTION: {str(e)}")
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = str(e)
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
def process_photos_automatic(job_id, upload_dir, quality_mode, similarity_threshold, session_id=None):
|
|
|
"""
|
|
|
Full automatic processing (no review step) - used when no reference photos loaded.
|
|
|
Processes all photos with quality-based selection.
|
|
|
"""
|
|
|
try:
|
|
|
processing_jobs[job_id]['status'] = 'processing'
|
|
|
processing_jobs[job_id]['progress'] = 5
|
|
|
processing_jobs[job_id]['message'] = 'Loading AI models...'
|
|
|
|
|
|
|
|
|
from photo_selector.siglip_embeddings import SigLIPEmbedder
|
|
|
from photo_selector.temporal import TemporalSegmenter
|
|
|
from photo_selector.clustering import PhotoClusterer, BucketClusterManager
|
|
|
from photo_selector.scoring import PhotoScorer, ClusterScorer
|
|
|
from photo_selector.auto_selector import SmartPhotoSelector, SelectionReason
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 20
|
|
|
processing_jobs[job_id]['message'] = 'Analyzing photos with SigLIP AI...'
|
|
|
|
|
|
embedder = SigLIPEmbedder()
|
|
|
embeddings = embedder.process_folder(upload_dir)
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 40
|
|
|
processing_jobs[job_id]['message'] = 'Organizing by date...'
|
|
|
|
|
|
|
|
|
segmenter = TemporalSegmenter(bucket_type="monthly")
|
|
|
buckets = segmenter.segment_folder(upload_dir)
|
|
|
|
|
|
|
|
|
estimated_target = max(10, len(embeddings) // 3)
|
|
|
targets = segmenter.calculate_target_per_bucket(buckets, estimated_target)
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 50
|
|
|
processing_jobs[job_id]['message'] = 'Grouping similar photos (adaptive clustering)...'
|
|
|
|
|
|
|
|
|
|
|
|
clusterer = BucketClusterManager(PhotoClusterer(min_cluster_size=5, temporal_gap_hours=24.0, timestamp_weight=0.3))
|
|
|
cluster_results = clusterer.cluster_all_buckets(buckets, embeddings, targets)
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 60
|
|
|
processing_jobs[job_id]['message'] = 'Scoring photo quality...'
|
|
|
|
|
|
|
|
|
scorer = ClusterScorer(PhotoScorer())
|
|
|
all_scores = {}
|
|
|
|
|
|
for bucket_key, bucket_data in cluster_results.items():
|
|
|
filenames = bucket_data['filenames']
|
|
|
labels = np.array(bucket_data['labels'])
|
|
|
bucket_embeddings = np.array([embeddings[fn] for fn in filenames])
|
|
|
|
|
|
for cluster_id in np.unique(labels):
|
|
|
cluster_mask = labels == cluster_id
|
|
|
cluster_indices = np.where(cluster_mask)[0]
|
|
|
cluster_filenames = [filenames[i] for i in cluster_indices]
|
|
|
cluster_embs = bucket_embeddings[cluster_mask]
|
|
|
cluster_paths = [os.path.join(upload_dir, fn) for fn in cluster_filenames]
|
|
|
|
|
|
scores = scorer.score_cluster(cluster_paths, cluster_embs)
|
|
|
|
|
|
for score in scores:
|
|
|
score['bucket'] = bucket_key
|
|
|
score['cluster'] = int(cluster_id)
|
|
|
score['cluster_key'] = f"{bucket_key}_cluster_{cluster_id}"
|
|
|
all_scores[score['filename']] = score
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 75
|
|
|
processing_jobs[job_id]['message'] = 'AI deciding which photos to keep...'
|
|
|
|
|
|
|
|
|
auto_selector = SmartPhotoSelector(
|
|
|
quality_mode=quality_mode,
|
|
|
similarity_threshold=similarity_threshold
|
|
|
)
|
|
|
|
|
|
selection_results = auto_selector.process_all_photos(
|
|
|
all_scores, embeddings, cluster_results
|
|
|
)
|
|
|
|
|
|
processing_jobs[job_id]['progress'] = 90
|
|
|
processing_jobs[job_id]['message'] = 'Preparing results...'
|
|
|
|
|
|
|
|
|
thumbs_dir = os.path.join(upload_dir, 'thumbnails')
|
|
|
os.makedirs(thumbs_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
results = {
|
|
|
'selected': [],
|
|
|
'rejected': [],
|
|
|
'summary': selection_results['summary'],
|
|
|
'rejection_breakdown': selection_results['rejection_breakdown'],
|
|
|
'bucket_stats': selection_results['bucket_stats']
|
|
|
}
|
|
|
|
|
|
|
|
|
for photo in selection_results['selected']:
|
|
|
filename = photo['filename']
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumbs_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(os.path.join(upload_dir, filename), thumb_path)
|
|
|
|
|
|
reason = photo.get('selection_reason', None)
|
|
|
if isinstance(reason, SelectionReason):
|
|
|
reason_text = reason.value
|
|
|
else:
|
|
|
reason_text = str(reason) if reason else 'High quality photo'
|
|
|
|
|
|
results['selected'].append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'score': float(photo.get('total', 0)),
|
|
|
'face_quality': float(photo.get('face_quality', 0)),
|
|
|
'aesthetic_quality': float(photo.get('aesthetic_quality', 0)),
|
|
|
'emotional_signal': float(photo.get('emotional_signal', 0)),
|
|
|
'uniqueness': float(photo.get('uniqueness', 0)),
|
|
|
'bucket': photo.get('bucket', 'unknown'),
|
|
|
'num_faces': int(photo.get('num_faces', 0)),
|
|
|
'selection_reason': reason_text,
|
|
|
'selection_detail': photo.get('selection_detail', reason_text)
|
|
|
})
|
|
|
|
|
|
|
|
|
for photo in selection_results['rejected']:
|
|
|
filename = photo['filename']
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumbs_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(os.path.join(upload_dir, filename), thumb_path)
|
|
|
|
|
|
reason = photo.get('rejection_reason', None)
|
|
|
if isinstance(reason, SelectionReason):
|
|
|
reason_text = reason.value
|
|
|
else:
|
|
|
reason_text = str(reason) if reason else 'Did not meet quality threshold'
|
|
|
|
|
|
results['rejected'].append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'score': float(photo.get('total', 0)),
|
|
|
'face_quality': float(photo.get('face_quality', 0)),
|
|
|
'aesthetic_quality': float(photo.get('aesthetic_quality', 0)),
|
|
|
'bucket': photo.get('bucket', 'unknown'),
|
|
|
'reason': reason_text,
|
|
|
'reason_detail': photo.get('rejection_detail', '')
|
|
|
})
|
|
|
|
|
|
|
|
|
results['selected'].sort(key=lambda x: x['score'], reverse=True)
|
|
|
results['rejected'].sort(key=lambda x: x['score'], reverse=True)
|
|
|
|
|
|
|
|
|
results_file = os.path.join(RESULTS_FOLDER, f"{job_id}.json")
|
|
|
with open(results_file, 'w') as f:
|
|
|
json.dump(results, f, indent=2, default=str)
|
|
|
|
|
|
processing_jobs[job_id]['status'] = 'complete'
|
|
|
processing_jobs[job_id]['progress'] = 100
|
|
|
processing_jobs[job_id]['message'] = 'Selection complete!'
|
|
|
processing_jobs[job_id]['results'] = results
|
|
|
|
|
|
except Exception as e:
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = str(e)
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
@app.route('/')
|
|
|
def index():
|
|
|
"""Main page - redirects to step 1 (reference upload)."""
|
|
|
return render_template('index.html')
|
|
|
|
|
|
|
|
|
@app.route('/preload_model')
|
|
|
def preload_model():
|
|
|
"""Pre-load the InsightFace model in the background."""
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
try:
|
|
|
|
|
|
temp_matcher = FaceMatcher(similarity_threshold=0.5)
|
|
|
if temp_matcher.is_initialized:
|
|
|
return jsonify({'success': True, 'message': 'Model loaded'})
|
|
|
else:
|
|
|
return jsonify({'success': False, 'message': 'Model failed to initialize'})
|
|
|
except Exception as e:
|
|
|
return jsonify({'success': False, 'message': str(e)})
|
|
|
|
|
|
|
|
|
@app.route('/step1')
|
|
|
def step1_reference():
|
|
|
"""Step 1: Upload reference photos of target person."""
|
|
|
|
|
|
if 'session_id' not in session:
|
|
|
session['session_id'] = str(uuid.uuid4())[:8]
|
|
|
return render_template('step1_reference.html', session_id=session['session_id'])
|
|
|
|
|
|
|
|
|
@app.route('/step2')
|
|
|
def step2_upload():
|
|
|
"""Step 2: Upload all event photos."""
|
|
|
session_id = session.get('session_id')
|
|
|
if not session_id:
|
|
|
return render_template('index.html')
|
|
|
|
|
|
|
|
|
ref_count = 0
|
|
|
if session_id in face_matchers:
|
|
|
ref_count = face_matchers[session_id].get_reference_count()
|
|
|
|
|
|
return render_template('step2_upload.html',
|
|
|
session_id=session_id,
|
|
|
reference_count=ref_count)
|
|
|
|
|
|
|
|
|
@app.route('/upload_reference', methods=['POST'])
|
|
|
def upload_reference():
|
|
|
"""Handle reference photo uploads (2-3 photos of target person)."""
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
|
|
|
if 'files' not in request.files:
|
|
|
return jsonify({'error': 'No files provided'}), 400
|
|
|
|
|
|
files = request.files.getlist('files')
|
|
|
if not files or files[0].filename == '':
|
|
|
return jsonify({'error': 'No files selected'}), 400
|
|
|
|
|
|
|
|
|
session_id = session.get('session_id')
|
|
|
if not session_id:
|
|
|
session_id = str(uuid.uuid4())[:8]
|
|
|
session['session_id'] = session_id
|
|
|
|
|
|
|
|
|
ref_dir = os.path.join(REFERENCE_FOLDER, session_id)
|
|
|
os.makedirs(ref_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
if session_id not in face_matchers:
|
|
|
face_matchers[session_id] = FaceMatcher(similarity_threshold=0.5)
|
|
|
|
|
|
matcher = face_matchers[session_id]
|
|
|
|
|
|
|
|
|
results = []
|
|
|
for file in files:
|
|
|
if file and allowed_file(file.filename):
|
|
|
filename = secure_filename(file.filename)
|
|
|
filepath = os.path.join(ref_dir, filename)
|
|
|
file.save(filepath)
|
|
|
|
|
|
|
|
|
result = matcher.add_reference_photo(filepath)
|
|
|
result['filename'] = filename
|
|
|
|
|
|
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(ref_dir, thumb_name)
|
|
|
create_thumbnail(filepath, thumb_path, size=(150, 150))
|
|
|
result['thumbnail'] = thumb_name
|
|
|
|
|
|
results.append(result)
|
|
|
|
|
|
return jsonify({
|
|
|
'session_id': session_id,
|
|
|
'results': results,
|
|
|
'total_references': matcher.get_reference_count(),
|
|
|
'message': f'Loaded {matcher.get_reference_count()} reference face(s)'
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/reference_status')
|
|
|
def reference_status():
|
|
|
"""Get current reference photo status."""
|
|
|
session_id = session.get('session_id')
|
|
|
if not session_id or session_id not in face_matchers:
|
|
|
return jsonify({
|
|
|
'session_id': session_id,
|
|
|
'reference_count': 0,
|
|
|
'ready': False
|
|
|
})
|
|
|
|
|
|
matcher = face_matchers[session_id]
|
|
|
return jsonify({
|
|
|
'session_id': session_id,
|
|
|
'reference_count': matcher.get_reference_count(),
|
|
|
'ready': matcher.get_reference_count() >= 1
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/clear_references', methods=['POST'])
|
|
|
def clear_references():
|
|
|
"""Clear all reference photos for current session."""
|
|
|
session_id = session.get('session_id')
|
|
|
|
|
|
if session_id and session_id in face_matchers:
|
|
|
face_matchers[session_id].clear_references()
|
|
|
|
|
|
|
|
|
ref_dir = os.path.join(REFERENCE_FOLDER, session_id)
|
|
|
if os.path.exists(ref_dir):
|
|
|
shutil.rmtree(ref_dir)
|
|
|
|
|
|
return jsonify({'message': 'References cleared', 'reference_count': 0})
|
|
|
|
|
|
|
|
|
@app.route('/reference_thumbnail/<filename>')
|
|
|
def get_reference_thumbnail(filename):
|
|
|
"""Serve reference photo thumbnails."""
|
|
|
session_id = session.get('session_id')
|
|
|
if not session_id:
|
|
|
return jsonify({'error': 'No session'}), 404
|
|
|
ref_dir = os.path.join(REFERENCE_FOLDER, session_id)
|
|
|
return send_from_directory(ref_dir, filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/upload_init', methods=['POST'])
|
|
|
def upload_init():
|
|
|
"""Initialize a chunked upload session."""
|
|
|
data = request.json
|
|
|
total_files = data.get('total_files', 0)
|
|
|
quality_mode = data.get('quality_mode', 'balanced')
|
|
|
similarity_threshold = data.get('similarity_threshold', 0.92)
|
|
|
|
|
|
|
|
|
upload_session_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, upload_session_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
face_session_id = session.get('session_id')
|
|
|
|
|
|
|
|
|
upload_sessions[upload_session_id] = {
|
|
|
'upload_dir': upload_dir,
|
|
|
'total_files': total_files,
|
|
|
'uploaded_files': [],
|
|
|
'quality_mode': quality_mode,
|
|
|
'similarity_threshold': similarity_threshold,
|
|
|
'face_session_id': face_session_id,
|
|
|
'created_at': time.time()
|
|
|
}
|
|
|
|
|
|
print(f"\n[Upload Session {upload_session_id}] Initialized for {total_files} files")
|
|
|
|
|
|
return jsonify({
|
|
|
'session_id': upload_session_id,
|
|
|
'message': 'Upload session initialized'
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/upload_chunk', methods=['POST'])
|
|
|
def upload_chunk():
|
|
|
"""Handle a chunk of files in a chunked upload."""
|
|
|
if 'files' not in request.files:
|
|
|
return jsonify({'error': 'No files provided'}), 400
|
|
|
|
|
|
session_id = request.form.get('session_id')
|
|
|
if not session_id or session_id not in upload_sessions:
|
|
|
return jsonify({'error': 'Invalid upload session'}), 400
|
|
|
|
|
|
upload_info = upload_sessions[session_id]
|
|
|
upload_dir = upload_info['upload_dir']
|
|
|
|
|
|
files = request.files.getlist('files')
|
|
|
saved_count = 0
|
|
|
|
|
|
for file in files:
|
|
|
if file and allowed_file(file.filename):
|
|
|
filename = secure_filename(file.filename)
|
|
|
|
|
|
base, ext = os.path.splitext(filename)
|
|
|
counter = 1
|
|
|
while os.path.exists(os.path.join(upload_dir, filename)):
|
|
|
filename = f"{base}_{counter}{ext}"
|
|
|
counter += 1
|
|
|
|
|
|
file.save(os.path.join(upload_dir, filename))
|
|
|
upload_info['uploaded_files'].append(filename)
|
|
|
saved_count += 1
|
|
|
|
|
|
chunk_index = request.form.get('chunk_index', '?')
|
|
|
print(f"[Upload Session {session_id}] Chunk {chunk_index}: saved {saved_count} files (total: {len(upload_info['uploaded_files'])})")
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'saved': saved_count,
|
|
|
'total_uploaded': len(upload_info['uploaded_files'])
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/upload_complete', methods=['POST'])
|
|
|
def upload_complete():
|
|
|
"""Complete a chunked upload and start processing."""
|
|
|
data = request.json
|
|
|
session_id = data.get('session_id')
|
|
|
|
|
|
if not session_id or session_id not in upload_sessions:
|
|
|
return jsonify({'error': 'Invalid upload session'}), 400
|
|
|
|
|
|
upload_info = upload_sessions[session_id]
|
|
|
upload_dir = upload_info['upload_dir']
|
|
|
saved_files = upload_info['uploaded_files']
|
|
|
quality_mode = upload_info['quality_mode']
|
|
|
similarity_threshold = upload_info['similarity_threshold']
|
|
|
face_session_id = upload_info['face_session_id']
|
|
|
|
|
|
if not saved_files:
|
|
|
shutil.rmtree(upload_dir)
|
|
|
del upload_sessions[session_id]
|
|
|
return jsonify({'error': 'No valid image files uploaded'}), 400
|
|
|
|
|
|
|
|
|
has_references = False
|
|
|
ref_count = 0
|
|
|
if face_session_id and face_session_id in face_matchers:
|
|
|
ref_count = face_matchers[face_session_id].get_reference_count()
|
|
|
has_references = ref_count > 0
|
|
|
|
|
|
|
|
|
job_id = session_id
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'queued',
|
|
|
'progress': 30,
|
|
|
'message': 'Starting AI processing...',
|
|
|
'total_files': len(saved_files),
|
|
|
'total_uploaded': len(saved_files),
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': face_session_id,
|
|
|
'has_reference_photos': has_references,
|
|
|
'reference_count': ref_count,
|
|
|
'quality_mode': quality_mode,
|
|
|
'similarity_threshold': similarity_threshold,
|
|
|
'results': None
|
|
|
}
|
|
|
|
|
|
|
|
|
del upload_sessions[session_id]
|
|
|
|
|
|
|
|
|
if has_references:
|
|
|
print(f"\n[Job {job_id}] NEW JOB (Chunked Upload) - Face Filtering Mode")
|
|
|
print(f" - Files uploaded: {len(saved_files)}")
|
|
|
print(f" - Reference photos: {ref_count}")
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_face_filter_only,
|
|
|
args=(job_id, upload_dir, face_session_id)
|
|
|
)
|
|
|
message = f'Scanning {len(saved_files)} photos to find your child using {ref_count} reference(s)...'
|
|
|
else:
|
|
|
print(f"\n[Job {job_id}] NEW JOB (Chunked Upload) - No Face Filtering")
|
|
|
print(f" - Files uploaded: {len(saved_files)}")
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_quality_selection,
|
|
|
args=(job_id, upload_dir, quality_mode, similarity_threshold)
|
|
|
)
|
|
|
message = f'Selecting best photos from {len(saved_files)} images...'
|
|
|
|
|
|
thread.daemon = True
|
|
|
thread.start()
|
|
|
|
|
|
processing_jobs[job_id]['message'] = message
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'message': message,
|
|
|
'total_files': len(saved_files)
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
from google_drive import (
|
|
|
is_drive_available, extract_folder_id, list_images_in_folder,
|
|
|
download_folder, get_folder_info, get_drive_service
|
|
|
)
|
|
|
GDRIVE_SERVICE_ACCOUNT_AVAILABLE = is_drive_available()
|
|
|
except ImportError:
|
|
|
GDRIVE_SERVICE_ACCOUNT_AVAILABLE = False
|
|
|
|
|
|
|
|
|
@app.route('/check_drive_status')
|
|
|
def check_drive_status():
|
|
|
"""Check if Google Drive Service Account is configured."""
|
|
|
return jsonify({
|
|
|
'available': GDRIVE_SERVICE_ACCOUNT_AVAILABLE,
|
|
|
'message': 'Service Account configured' if GDRIVE_SERVICE_ACCOUNT_AVAILABLE else 'Service Account not configured'
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/preview_drive_folder', methods=['POST'])
|
|
|
def preview_drive_folder():
|
|
|
"""Preview contents of a Google Drive folder before importing."""
|
|
|
if not GDRIVE_SERVICE_ACCOUNT_AVAILABLE:
|
|
|
return jsonify({'error': 'Google Drive Service Account not configured'}), 400
|
|
|
|
|
|
data = request.get_json()
|
|
|
folder_url = data.get('folder_url', '').strip()
|
|
|
|
|
|
if not folder_url:
|
|
|
return jsonify({'error': 'Please provide a folder URL'}), 400
|
|
|
|
|
|
try:
|
|
|
folder_id = extract_folder_id(folder_url)
|
|
|
info = get_folder_info(folder_id)
|
|
|
|
|
|
if not info.get('success'):
|
|
|
return jsonify({'error': info.get('error', 'Could not access folder')}), 400
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'folder_id': folder_id,
|
|
|
'folder_name': info.get('folder_name', 'Unknown'),
|
|
|
'image_count': info.get('image_count', 0),
|
|
|
'preview_images': info.get('images', [])[:5]
|
|
|
})
|
|
|
except ValueError as e:
|
|
|
return jsonify({'error': str(e)}), 400
|
|
|
except Exception as e:
|
|
|
print(f"[Drive] Error previewing folder: {e}")
|
|
|
return jsonify({'error': f'Could not access folder: {str(e)}'}), 400
|
|
|
|
|
|
|
|
|
@app.route('/import_from_drive', methods=['POST'])
|
|
|
def import_from_drive():
|
|
|
"""Import photos from Google Drive folder (Step 2 - initial upload)."""
|
|
|
if not GDRIVE_SERVICE_ACCOUNT_AVAILABLE:
|
|
|
return jsonify({'error': 'Google Drive Service Account not configured'}), 400
|
|
|
|
|
|
data = request.get_json()
|
|
|
folder_url = data.get('folder_url', '').strip()
|
|
|
quality_mode = data.get('quality_mode', 'balanced')
|
|
|
similarity_threshold = float(data.get('similarity_threshold', 0.4))
|
|
|
|
|
|
if not folder_url:
|
|
|
return jsonify({'error': 'Please provide a folder URL'}), 400
|
|
|
|
|
|
|
|
|
face_session_id = session.get('session_id')
|
|
|
has_references = False
|
|
|
ref_count = 0
|
|
|
if face_session_id and face_session_id in face_matchers:
|
|
|
ref_count = face_matchers[face_session_id].get_reference_count()
|
|
|
has_references = ref_count > 0
|
|
|
|
|
|
try:
|
|
|
folder_id = extract_folder_id(folder_url)
|
|
|
except ValueError as e:
|
|
|
return jsonify({'error': str(e)}), 400
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(os.path.join(upload_dir, 'thumbnails'), exist_ok=True)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'downloading',
|
|
|
'progress': 5,
|
|
|
'message': 'Connecting to Google Drive...',
|
|
|
'total_files': 0,
|
|
|
'total_uploaded': 0,
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': face_session_id,
|
|
|
'has_reference_photos': has_references,
|
|
|
'reference_count': ref_count,
|
|
|
'quality_mode': quality_mode,
|
|
|
'similarity_threshold': similarity_threshold,
|
|
|
'results': None
|
|
|
}
|
|
|
|
|
|
|
|
|
def download_and_process():
|
|
|
try:
|
|
|
|
|
|
if has_references:
|
|
|
face_matcher = face_matchers.get(face_session_id)
|
|
|
if face_matcher and face_matcher.get_reference_count() > 0:
|
|
|
print(f"[Job {job_id}] Using HYBRID MODE: Parallel download + face detection")
|
|
|
process_drive_with_parallel_face_detection(job_id, folder_id, upload_dir, face_matcher)
|
|
|
return
|
|
|
|
|
|
|
|
|
def progress_callback(current, total, _filename):
|
|
|
pct = int(5 + (current / total) * 25)
|
|
|
processing_jobs[job_id]['progress'] = pct
|
|
|
processing_jobs[job_id]['message'] = f'Downloading from Drive: {current}/{total}'
|
|
|
processing_jobs[job_id]['total_files'] = total
|
|
|
processing_jobs[job_id]['total_uploaded'] = current
|
|
|
|
|
|
print(f"[Job {job_id}] Starting Google Drive download from folder {folder_id}")
|
|
|
|
|
|
result = download_folder(folder_id, upload_dir, progress_callback)
|
|
|
|
|
|
if not result.get('success') and result.get('downloaded', 0) == 0:
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = result.get('message', 'Download failed')
|
|
|
return
|
|
|
|
|
|
downloaded_count = result.get('downloaded', 0) + result.get('skipped', 0)
|
|
|
downloaded_files = result.get('files', [])
|
|
|
processing_jobs[job_id]['total_uploaded'] = downloaded_count
|
|
|
processing_jobs[job_id]['total_files'] = downloaded_count
|
|
|
|
|
|
print(f"[Job {job_id}] Downloaded {downloaded_count} photos from Google Drive")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['message'] = f'Selecting best from {downloaded_count} photos...'
|
|
|
process_photos_quality_selection(job_id, upload_dir, quality_mode, similarity_threshold, downloaded_files)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Job {job_id}] Drive import error: {e}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = f'Import failed: {str(e)}'
|
|
|
|
|
|
thread = threading.Thread(target=download_and_process)
|
|
|
thread.daemon = True
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'message': 'Starting Google Drive import...'
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/import_from_drive_reupload/<dataset_name>', methods=['POST'])
|
|
|
def import_from_drive_reupload(dataset_name):
|
|
|
"""Import photos from Google Drive folder for reupload (after server restart)."""
|
|
|
if not GDRIVE_SERVICE_ACCOUNT_AVAILABLE:
|
|
|
return jsonify({'error': 'Google Drive Service Account not configured'}), 400
|
|
|
|
|
|
data = request.get_json()
|
|
|
folder_url = data.get('folder_url', '').strip()
|
|
|
|
|
|
if not folder_url:
|
|
|
return jsonify({'error': 'Please provide a folder URL'}), 400
|
|
|
|
|
|
try:
|
|
|
folder_id = extract_folder_id(folder_url)
|
|
|
except ValueError as e:
|
|
|
return jsonify({'error': str(e)}), 400
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(os.path.join(upload_dir, 'thumbnails'), exist_ok=True)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'downloading',
|
|
|
'progress': 5,
|
|
|
'message': 'Connecting to Google Drive...'
|
|
|
}
|
|
|
|
|
|
|
|
|
def download_and_process_reupload():
|
|
|
try:
|
|
|
def progress_callback(current, total, filename):
|
|
|
pct = int(5 + (current / total) * 45)
|
|
|
processing_jobs[job_id]['progress'] = pct
|
|
|
processing_jobs[job_id]['message'] = f'Downloading from Drive: {current}/{total}'
|
|
|
|
|
|
print(f"[Job {job_id}] Starting Google Drive reupload for dataset '{dataset_name}'")
|
|
|
|
|
|
result = download_folder(folder_id, upload_dir, progress_callback)
|
|
|
|
|
|
if not result.get('success') and result.get('downloaded', 0) == 0:
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = result.get('message', 'Download failed')
|
|
|
return
|
|
|
|
|
|
uploaded_filenames = result.get('files', [])
|
|
|
print(f"[Job {job_id}] Downloaded {len(uploaded_filenames)} photos")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id]['message'] = 'Loading saved dataset...'
|
|
|
processing_jobs[job_id]['progress'] = 55
|
|
|
|
|
|
supabase_data = load_dataset_from_supabase(dataset_name)
|
|
|
if not supabase_data:
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = 'Dataset not found in Supabase'
|
|
|
return
|
|
|
|
|
|
metadata = supabase_data.get('metadata', {})
|
|
|
face_results = supabase_data.get('face_results', {})
|
|
|
embeddings_data = supabase_data.get('embeddings_data')
|
|
|
|
|
|
|
|
|
new_session_id = str(uuid.uuid4())[:8]
|
|
|
if embeddings_data:
|
|
|
import io
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
data_np = np.load(io.BytesIO(embeddings_data), allow_pickle=True)
|
|
|
matcher = FaceMatcher(similarity_threshold=float(data_np['threshold']))
|
|
|
matcher.reference_embeddings = list(data_np['embeddings'])
|
|
|
matcher.average_embedding = data_np['average']
|
|
|
face_matchers[new_session_id] = matcher
|
|
|
|
|
|
print(f"[Job {job_id}] Loaded {len(matcher.reference_embeddings)} reference embeddings")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
def normalize_filename(filename):
|
|
|
"""Normalize Google Drive filename to match browser upload format."""
|
|
|
|
|
|
match = re.match(r'^(.+)\((\d+)\)(\.[^.]+)$', filename)
|
|
|
if match:
|
|
|
base, num, ext = match.groups()
|
|
|
filename = f"{base}{num}{ext}"
|
|
|
|
|
|
return secure_filename(filename)
|
|
|
|
|
|
filtered_photos = face_results.get('filtered_photos', [])
|
|
|
uploaded_set = set(uploaded_filenames)
|
|
|
saved_filenames_set = {p.get('filename') for p in filtered_photos}
|
|
|
|
|
|
|
|
|
normalized_to_uploaded = {normalize_filename(f): f for f in uploaded_filenames}
|
|
|
|
|
|
matched_photos = []
|
|
|
for p in filtered_photos:
|
|
|
saved_filename = p.get('filename')
|
|
|
actual_filename = None
|
|
|
|
|
|
|
|
|
if saved_filename in uploaded_set:
|
|
|
actual_filename = saved_filename
|
|
|
|
|
|
elif saved_filename in normalized_to_uploaded:
|
|
|
actual_filename = normalized_to_uploaded[saved_filename]
|
|
|
|
|
|
if actual_filename:
|
|
|
|
|
|
photo_entry = p.copy()
|
|
|
photo_entry['filename'] = actual_filename
|
|
|
photo_entry['thumbnail'] = get_thumbnail_name(actual_filename)
|
|
|
matched_photos.append(photo_entry)
|
|
|
|
|
|
|
|
|
matched_saved = {p.get('filename') for p in filtered_photos if p.get('filename') in uploaded_set or p.get('filename') in normalized_to_uploaded}
|
|
|
unmatched_from_saved = [p.get('filename') for p in filtered_photos if p.get('filename') not in matched_saved]
|
|
|
matched_uploaded = {m['filename'] for m in matched_photos}
|
|
|
unmatched_from_uploaded = [f for f in uploaded_filenames if f not in matched_uploaded]
|
|
|
|
|
|
print(f"[Job {job_id}] Matched {len(matched_photos)} of {len(filtered_photos)} photos")
|
|
|
print(f"[Job {job_id}] DEBUG: {len(unmatched_from_saved)} saved photos NOT found in uploaded files:")
|
|
|
for fname in unmatched_from_saved[:20]:
|
|
|
print(f" [SAVED NOT IN UPLOAD] '{fname}'")
|
|
|
if len(unmatched_from_saved) > 20:
|
|
|
print(f" ... and {len(unmatched_from_saved) - 20} more")
|
|
|
|
|
|
print(f"[Job {job_id}] DEBUG: {len(unmatched_from_uploaded)} uploaded files NOT found in saved data:")
|
|
|
for fname in unmatched_from_uploaded[:20]:
|
|
|
print(f" [UPLOAD NOT IN SAVED] '{fname}'")
|
|
|
if len(unmatched_from_uploaded) > 20:
|
|
|
print(f" ... and {len(unmatched_from_uploaded) - 20} more")
|
|
|
|
|
|
|
|
|
review_data = {
|
|
|
'filtered_photos': matched_photos,
|
|
|
'total_processed': len(uploaded_filenames),
|
|
|
'match_count': len(matched_photos)
|
|
|
}
|
|
|
|
|
|
with open(os.path.join(RESULTS_FOLDER, f"{job_id}_review.json"), 'w') as f:
|
|
|
json.dump(review_data, f)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id].update({
|
|
|
'status': 'review_pending',
|
|
|
'progress': 100,
|
|
|
'message': 'Photos downloaded from Google Drive',
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': new_session_id,
|
|
|
'has_reference_photos': True,
|
|
|
'reference_count': metadata.get('reference_count', 0),
|
|
|
'quality_mode': metadata.get('quality_mode', 'balanced'),
|
|
|
'similarity_threshold': metadata.get('similarity_threshold', 0.4),
|
|
|
'confirmed_photos': [p['filename'] for p in matched_photos],
|
|
|
'review_data': review_data,
|
|
|
'total_photos': len(matched_photos),
|
|
|
'from_dataset': dataset_name,
|
|
|
'from_supabase': True,
|
|
|
'redirect_url': f'/step3_review/{job_id}'
|
|
|
})
|
|
|
|
|
|
print(f"[Job {job_id}] Reupload complete - ready for review")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Job {job_id}] Drive reupload error: {e}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = f'Import failed: {str(e)}'
|
|
|
|
|
|
thread = threading.Thread(target=download_and_process_reupload)
|
|
|
thread.daemon = True
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'message': 'Starting Google Drive import...'
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
|
def upload_files():
|
|
|
"""Handle file uploads and start processing."""
|
|
|
if 'files' not in request.files:
|
|
|
return jsonify({'error': 'No files provided'}), 400
|
|
|
|
|
|
files = request.files.getlist('files')
|
|
|
if not files or files[0].filename == '':
|
|
|
return jsonify({'error': 'No files selected'}), 400
|
|
|
|
|
|
|
|
|
quality_mode = request.form.get('quality_mode', 'balanced')
|
|
|
similarity_threshold = float(request.form.get('similarity', 0.92))
|
|
|
|
|
|
|
|
|
session_id = session.get('session_id')
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
saved_files = []
|
|
|
for file in files:
|
|
|
if file and allowed_file(file.filename):
|
|
|
filename = secure_filename(file.filename)
|
|
|
|
|
|
base, ext = os.path.splitext(filename)
|
|
|
counter = 1
|
|
|
while os.path.exists(os.path.join(upload_dir, filename)):
|
|
|
filename = f"{base}_{counter}{ext}"
|
|
|
counter += 1
|
|
|
|
|
|
file.save(os.path.join(upload_dir, filename))
|
|
|
saved_files.append(filename)
|
|
|
|
|
|
if not saved_files:
|
|
|
shutil.rmtree(upload_dir)
|
|
|
return jsonify({'error': 'No valid image files'}), 400
|
|
|
|
|
|
|
|
|
has_references = False
|
|
|
ref_count = 0
|
|
|
if session_id and session_id in face_matchers:
|
|
|
ref_count = face_matchers[session_id].get_reference_count()
|
|
|
has_references = ref_count > 0
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'queued',
|
|
|
'progress': 0,
|
|
|
'message': 'Uploading files...',
|
|
|
'total_files': len(saved_files),
|
|
|
'total_uploaded': len(saved_files),
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': session_id,
|
|
|
'has_reference_photos': has_references,
|
|
|
'reference_count': ref_count,
|
|
|
'quality_mode': quality_mode,
|
|
|
'similarity_threshold': similarity_threshold,
|
|
|
'results': None
|
|
|
}
|
|
|
|
|
|
|
|
|
if has_references:
|
|
|
|
|
|
print(f"\n[Job {job_id}] NEW JOB - Face Filtering Mode")
|
|
|
print(f" - Files uploaded: {len(saved_files)}")
|
|
|
print(f" - Reference photos: {ref_count}")
|
|
|
print(f" - Session ID: {session_id}")
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_face_filter_only,
|
|
|
args=(job_id, upload_dir, session_id)
|
|
|
)
|
|
|
message = f'Scanning {len(saved_files)} photos to find your child using {ref_count} reference(s)...'
|
|
|
else:
|
|
|
|
|
|
print(f"\n[Job {job_id}] NEW JOB - Full Automatic Mode")
|
|
|
print(f" - Files uploaded: {len(saved_files)}")
|
|
|
print(f" - Quality mode: {quality_mode}")
|
|
|
print(f" - Similarity threshold: {similarity_threshold}")
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_automatic,
|
|
|
args=(job_id, upload_dir, quality_mode, similarity_threshold, session_id)
|
|
|
)
|
|
|
message = 'Processing started - AI will automatically select the best photos!'
|
|
|
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'files_uploaded': len(saved_files),
|
|
|
'has_reference_photos': has_references,
|
|
|
'reference_count': ref_count,
|
|
|
'message': message,
|
|
|
'needs_review': has_references
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/upload_folder', methods=['POST'])
|
|
|
def upload_folder():
|
|
|
"""Process photos from a local folder path (for large batches)."""
|
|
|
data = request.get_json()
|
|
|
folder_path = data.get('folder_path', '').strip()
|
|
|
quality_mode = data.get('quality_mode', 'balanced')
|
|
|
similarity_threshold = float(data.get('similarity_threshold', 0.92))
|
|
|
|
|
|
if not folder_path:
|
|
|
return jsonify({'error': 'No folder path provided'}), 400
|
|
|
|
|
|
|
|
|
if not os.path.isdir(folder_path):
|
|
|
return jsonify({'error': f'Folder not found: {folder_path}'}), 400
|
|
|
|
|
|
|
|
|
session_id = session.get('session_id')
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
|
|
|
|
|
|
image_extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
|
|
|
image_files = [f for f in os.listdir(folder_path)
|
|
|
if os.path.splitext(f.lower())[1] in image_extensions]
|
|
|
|
|
|
if not image_files:
|
|
|
return jsonify({'error': 'No valid image files found in folder'}), 400
|
|
|
|
|
|
print(f"\n[Job {job_id}] LOCAL FOLDER MODE")
|
|
|
print(f" - Folder: {folder_path}")
|
|
|
print(f" - Images found: {len(image_files)}")
|
|
|
|
|
|
|
|
|
has_references = False
|
|
|
ref_count = 0
|
|
|
if session_id and session_id in face_matchers:
|
|
|
ref_count = face_matchers[session_id].get_reference_count()
|
|
|
has_references = ref_count > 0
|
|
|
|
|
|
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
os.makedirs(thumb_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'queued',
|
|
|
'progress': 0,
|
|
|
'message': 'Preparing to process photos...',
|
|
|
'total_files': len(image_files),
|
|
|
'total_uploaded': len(image_files),
|
|
|
'upload_dir': folder_path,
|
|
|
'thumb_dir': thumb_dir,
|
|
|
'session_id': session_id,
|
|
|
'has_reference_photos': has_references,
|
|
|
'reference_count': ref_count,
|
|
|
'quality_mode': quality_mode,
|
|
|
'similarity_threshold': similarity_threshold,
|
|
|
'is_local_folder': True,
|
|
|
'results': None
|
|
|
}
|
|
|
|
|
|
|
|
|
if has_references:
|
|
|
print(f" - Reference photos: {ref_count}")
|
|
|
print(f" - Mode: Face Filtering")
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_face_filter_only,
|
|
|
args=(job_id, folder_path, session_id)
|
|
|
)
|
|
|
message = f'Scanning {len(image_files)} photos to find your child...'
|
|
|
else:
|
|
|
print(f" - Mode: Full Automatic")
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_automatic,
|
|
|
args=(job_id, folder_path, quality_mode, similarity_threshold, session_id)
|
|
|
)
|
|
|
message = 'Processing started - AI will automatically select the best photos!'
|
|
|
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'files_found': len(image_files),
|
|
|
'has_reference_photos': has_references,
|
|
|
'reference_count': ref_count,
|
|
|
'message': message,
|
|
|
'needs_review': has_references
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/status/<job_id>')
|
|
|
def get_status(job_id):
|
|
|
"""Get processing status."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
response = {
|
|
|
'status': job['status'],
|
|
|
'progress': job['progress'],
|
|
|
'message': job['message'],
|
|
|
'total_photos': job.get('total_photos', 0),
|
|
|
'photos_checked': job.get('photos_checked', 0)
|
|
|
}
|
|
|
|
|
|
if job['status'] == 'complete' and job['results']:
|
|
|
response['summary'] = job['results']['summary']
|
|
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
|
@app.route('/results/<job_id>')
|
|
|
def get_results(job_id):
|
|
|
"""Get processing results."""
|
|
|
try:
|
|
|
if job_id not in processing_jobs:
|
|
|
|
|
|
results_file = os.path.join(RESULTS_FOLDER, f"{job_id}.json")
|
|
|
if os.path.exists(results_file):
|
|
|
with open(results_file, 'r') as f:
|
|
|
return jsonify(json.load(f))
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
if job['status'] != 'complete':
|
|
|
return jsonify({'error': 'Processing not complete', 'status': job['status'], 'message': job.get('message', '')}), 400
|
|
|
|
|
|
|
|
|
if 'results' in job and job['results']:
|
|
|
return jsonify(job['results'])
|
|
|
|
|
|
|
|
|
results_file = os.path.join(RESULTS_FOLDER, f"{job_id}.json")
|
|
|
if os.path.exists(results_file):
|
|
|
with open(results_file, 'r') as f:
|
|
|
return jsonify(json.load(f))
|
|
|
|
|
|
return jsonify({'error': 'Results not found'}), 404
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/thumbnail/<job_id>/<filename>')
|
|
|
def get_thumbnail(job_id, filename):
|
|
|
"""Serve thumbnail images, generating on-demand if needed."""
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumb_dir, thumb_name)
|
|
|
|
|
|
|
|
|
if os.path.exists(thumb_path):
|
|
|
return send_from_directory(thumb_dir, thumb_name)
|
|
|
|
|
|
|
|
|
original_path = os.path.join(UPLOAD_FOLDER, job_id, filename)
|
|
|
if os.path.exists(original_path):
|
|
|
os.makedirs(thumb_dir, exist_ok=True)
|
|
|
create_thumbnail(original_path, thumb_path)
|
|
|
if os.path.exists(thumb_path):
|
|
|
return send_from_directory(thumb_dir, thumb_name)
|
|
|
|
|
|
|
|
|
if os.path.exists(os.path.join(thumb_dir, filename)):
|
|
|
return send_from_directory(thumb_dir, filename)
|
|
|
|
|
|
return jsonify({'error': 'Thumbnail not found'}), 404
|
|
|
|
|
|
|
|
|
@app.route('/photo/<job_id>/<filename>')
|
|
|
def get_photo(job_id, filename):
|
|
|
"""Serve full-size photos with proper EXIF rotation handling."""
|
|
|
from io import BytesIO
|
|
|
from PIL import ExifTags
|
|
|
|
|
|
photo_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
filepath = os.path.join(photo_dir, filename)
|
|
|
|
|
|
if not os.path.exists(filepath):
|
|
|
return jsonify({'error': 'File not found'}), 404
|
|
|
|
|
|
ext = os.path.splitext(filename)[1].lower()
|
|
|
|
|
|
|
|
|
if ext in ['.heic', '.heif']:
|
|
|
try:
|
|
|
img = Image.open(filepath)
|
|
|
img = img.convert('RGB')
|
|
|
buffer = BytesIO()
|
|
|
img.save(buffer, format='JPEG', quality=90)
|
|
|
buffer.seek(0)
|
|
|
return send_file(buffer, mimetype='image/jpeg')
|
|
|
except Exception as e:
|
|
|
print(f"Error converting HEIC: {e}")
|
|
|
return send_from_directory(photo_dir, filename)
|
|
|
|
|
|
|
|
|
if ext in ['.jpg', '.jpeg']:
|
|
|
try:
|
|
|
img = Image.open(filepath)
|
|
|
|
|
|
|
|
|
try:
|
|
|
for orientation in ExifTags.TAGS.keys():
|
|
|
if ExifTags.TAGS[orientation] == 'Orientation':
|
|
|
break
|
|
|
exif = img._getexif()
|
|
|
if exif is not None:
|
|
|
orientation_value = exif.get(orientation)
|
|
|
if orientation_value == 3:
|
|
|
img = img.rotate(180, expand=True)
|
|
|
elif orientation_value == 6:
|
|
|
img = img.rotate(270, expand=True)
|
|
|
elif orientation_value == 8:
|
|
|
img = img.rotate(90, expand=True)
|
|
|
except (AttributeError, KeyError, IndexError):
|
|
|
pass
|
|
|
|
|
|
|
|
|
if img.mode != 'RGB':
|
|
|
img = img.convert('RGB')
|
|
|
|
|
|
buffer = BytesIO()
|
|
|
img.save(buffer, format='JPEG', quality=90)
|
|
|
buffer.seek(0)
|
|
|
return send_file(buffer, mimetype='image/jpeg')
|
|
|
except Exception as e:
|
|
|
print(f"Error processing JPEG: {e}")
|
|
|
return send_from_directory(photo_dir, filename)
|
|
|
|
|
|
|
|
|
return send_from_directory(photo_dir, filename)
|
|
|
|
|
|
|
|
|
@app.route('/download/<job_id>')
|
|
|
def download_selected(job_id):
|
|
|
"""Download selected photos as zip with timestamp-sorted naming.
|
|
|
|
|
|
Uses DISK-BASED ZIP creation (not memory) to handle large photo sets (1000+).
|
|
|
The ZIP is created on disk, then streamed to the browser in chunks.
|
|
|
This prevents memory issues and timeouts on large downloads.
|
|
|
"""
|
|
|
import zipfile
|
|
|
import tempfile
|
|
|
from datetime import datetime
|
|
|
from collections import defaultdict
|
|
|
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
if job['status'] != 'complete':
|
|
|
return jsonify({'error': 'Processing not complete'}), 400
|
|
|
|
|
|
results = job.get('results', {})
|
|
|
selected = results.get('selected', [])
|
|
|
upload_dir = job.get('upload_dir', '')
|
|
|
|
|
|
if not selected:
|
|
|
return jsonify({'error': 'No selected photos found'}), 404
|
|
|
|
|
|
if not upload_dir:
|
|
|
return jsonify({'error': 'Upload directory not found'}), 404
|
|
|
|
|
|
print(f"[Download] Starting disk-based ZIP for {len(selected)} photos...")
|
|
|
|
|
|
|
|
|
MONTH_ABBREV = {
|
|
|
1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr",
|
|
|
5: "May", 6: "Jun", 7: "Jul", 8: "Aug",
|
|
|
9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
|
|
|
}
|
|
|
|
|
|
|
|
|
from photo_selector.utils import get_photo_timestamp
|
|
|
|
|
|
|
|
|
photos_by_month = defaultdict(list)
|
|
|
photos_no_timestamp = []
|
|
|
|
|
|
for photo in selected:
|
|
|
filename = photo.get('filename', '')
|
|
|
ts = photo.get('timestamp')
|
|
|
|
|
|
|
|
|
if not ts:
|
|
|
photo_path = os.path.join(upload_dir, filename)
|
|
|
if os.path.exists(photo_path):
|
|
|
dt = get_photo_timestamp(photo_path)
|
|
|
if dt:
|
|
|
ts = dt.timestamp()
|
|
|
|
|
|
if ts:
|
|
|
dt = datetime.fromtimestamp(ts)
|
|
|
month_key = (dt.year, dt.month)
|
|
|
photos_by_month[month_key].append({
|
|
|
'filename': filename,
|
|
|
'timestamp': ts,
|
|
|
'datetime': dt
|
|
|
})
|
|
|
else:
|
|
|
photos_no_timestamp.append({'filename': filename, 'timestamp': 0})
|
|
|
|
|
|
|
|
|
for month_key in photos_by_month:
|
|
|
photos_by_month[month_key].sort(key=lambda x: x['timestamp'])
|
|
|
|
|
|
|
|
|
temp_zip_path = os.path.join(tempfile.gettempdir(), f'selected_photos_{job_id}.zip')
|
|
|
files_added = 0
|
|
|
|
|
|
try:
|
|
|
|
|
|
with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_STORED) as zf:
|
|
|
|
|
|
for month_key in sorted(photos_by_month.keys()):
|
|
|
year, month = month_key
|
|
|
month_abbrev = MONTH_ABBREV[month]
|
|
|
photos = photos_by_month[month_key]
|
|
|
|
|
|
for idx, photo in enumerate(photos, start=1):
|
|
|
original_filename = photo['filename']
|
|
|
photo_path = os.path.join(upload_dir, original_filename)
|
|
|
|
|
|
if os.path.exists(photo_path):
|
|
|
|
|
|
ext = os.path.splitext(original_filename)[1]
|
|
|
base_name = os.path.splitext(original_filename)[0]
|
|
|
new_filename = f"{month_abbrev}_{idx}_{base_name}{ext}"
|
|
|
|
|
|
zf.write(photo_path, new_filename)
|
|
|
files_added += 1
|
|
|
|
|
|
|
|
|
if files_added % 100 == 0:
|
|
|
print(f"[Download] Added {files_added} files to ZIP...")
|
|
|
else:
|
|
|
print(f"[Download] File not found: {photo_path}")
|
|
|
|
|
|
|
|
|
for idx, photo in enumerate(photos_no_timestamp, start=1):
|
|
|
original_filename = photo['filename']
|
|
|
photo_path = os.path.join(upload_dir, original_filename)
|
|
|
|
|
|
if os.path.exists(photo_path):
|
|
|
ext = os.path.splitext(original_filename)[1]
|
|
|
base_name = os.path.splitext(original_filename)[0]
|
|
|
new_filename = f"NoDate_{idx}_{base_name}{ext}"
|
|
|
|
|
|
zf.write(photo_path, new_filename)
|
|
|
files_added += 1
|
|
|
else:
|
|
|
print(f"[Download] File not found: {photo_path}")
|
|
|
|
|
|
if files_added == 0:
|
|
|
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
return jsonify({'error': f'No files found in {upload_dir}. Files may have been cleaned up.'}), 404
|
|
|
|
|
|
|
|
|
zip_size_mb = os.path.getsize(temp_zip_path) / (1024 * 1024)
|
|
|
print(f"[Download] ZIP created: {files_added} files, {zip_size_mb:.1f} MB")
|
|
|
|
|
|
|
|
|
def generate_and_cleanup():
|
|
|
"""Generator that streams ZIP file and deletes it after completion."""
|
|
|
try:
|
|
|
with open(temp_zip_path, 'rb') as f:
|
|
|
while True:
|
|
|
chunk = f.read(8192 * 16)
|
|
|
if not chunk:
|
|
|
break
|
|
|
yield chunk
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
print(f"[Download] Cleaned up temp ZIP: {temp_zip_path}")
|
|
|
except Exception as e:
|
|
|
print(f"[Download] Error cleaning up temp ZIP: {e}")
|
|
|
|
|
|
|
|
|
response = Response(
|
|
|
generate_and_cleanup(),
|
|
|
mimetype='application/zip',
|
|
|
headers={
|
|
|
'Content-Disposition': f'attachment; filename=selected_photos_{job_id}.zip',
|
|
|
'Content-Length': str(os.path.getsize(temp_zip_path))
|
|
|
}
|
|
|
)
|
|
|
return response
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
print(f"[Download] Error creating ZIP: {e}")
|
|
|
return jsonify({'error': f'Error creating ZIP: {str(e)}'}), 500
|
|
|
|
|
|
|
|
|
@app.route('/download_filtered/<job_id>')
|
|
|
def download_filtered(job_id):
|
|
|
"""Download all filtered photos (after face matching, before quality selection).
|
|
|
|
|
|
Uses DISK-BASED ZIP creation (not memory) to handle large photo sets (1000+).
|
|
|
"""
|
|
|
import zipfile
|
|
|
import tempfile
|
|
|
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
|
|
|
|
|
|
filtered_photos = []
|
|
|
if 'review_data' in job:
|
|
|
filtered_photos = [p['filename'] for p in job['review_data'].get('filtered_photos', [])]
|
|
|
else:
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
with open(review_file, 'r') as f:
|
|
|
review_data = json.load(f)
|
|
|
filtered_photos = [p['filename'] for p in review_data.get('filtered_photos', [])]
|
|
|
|
|
|
if not filtered_photos:
|
|
|
return jsonify({'error': 'No filtered photos found'}), 404
|
|
|
|
|
|
print(f"[Download] Starting disk-based ZIP for {len(filtered_photos)} filtered photos...")
|
|
|
|
|
|
|
|
|
temp_zip_path = os.path.join(tempfile.gettempdir(), f'filtered_photos_{job_id}.zip')
|
|
|
files_added = 0
|
|
|
|
|
|
try:
|
|
|
with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_STORED) as zf:
|
|
|
for filename in filtered_photos:
|
|
|
photo_path = os.path.join(job['upload_dir'], filename)
|
|
|
if os.path.exists(photo_path):
|
|
|
zf.write(photo_path, filename)
|
|
|
files_added += 1
|
|
|
if files_added % 100 == 0:
|
|
|
print(f"[Download] Added {files_added} files to ZIP...")
|
|
|
|
|
|
if files_added == 0:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
return jsonify({'error': 'No files found. Files may have been cleaned up.'}), 404
|
|
|
|
|
|
zip_size_mb = os.path.getsize(temp_zip_path) / (1024 * 1024)
|
|
|
print(f"[Download] ZIP created: {files_added} files, {zip_size_mb:.1f} MB")
|
|
|
|
|
|
|
|
|
def generate_and_cleanup():
|
|
|
try:
|
|
|
with open(temp_zip_path, 'rb') as f:
|
|
|
while True:
|
|
|
chunk = f.read(8192 * 16)
|
|
|
if not chunk:
|
|
|
break
|
|
|
yield chunk
|
|
|
finally:
|
|
|
try:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
print(f"[Download] Cleaned up temp ZIP: {temp_zip_path}")
|
|
|
except Exception as e:
|
|
|
print(f"[Download] Error cleaning up temp ZIP: {e}")
|
|
|
|
|
|
return Response(
|
|
|
generate_and_cleanup(),
|
|
|
mimetype='application/zip',
|
|
|
headers={
|
|
|
'Content-Disposition': f'attachment; filename=filtered_photos_{job_id}.zip',
|
|
|
'Content-Length': str(os.path.getsize(temp_zip_path))
|
|
|
}
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
print(f"[Download] Error creating ZIP: {e}")
|
|
|
return jsonify({'error': f'Error creating ZIP: {str(e)}'}), 500
|
|
|
|
|
|
|
|
|
@app.route('/download_unmatched/<job_id>')
|
|
|
def download_unmatched(job_id):
|
|
|
"""Download photos where target person was NOT detected, with timestamp-sorted naming."""
|
|
|
import zipfile
|
|
|
import tempfile
|
|
|
from datetime import datetime
|
|
|
from collections import defaultdict
|
|
|
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
upload_dir = job.get('upload_dir', '')
|
|
|
|
|
|
if not upload_dir:
|
|
|
return jsonify({'error': 'Upload directory not found'}), 404
|
|
|
|
|
|
|
|
|
unmatched_photos = []
|
|
|
if 'review_data' in job:
|
|
|
unmatched_photos = job['review_data'].get('unmatched_photos', [])
|
|
|
else:
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
with open(review_file, 'r') as f:
|
|
|
review_data = json.load(f)
|
|
|
unmatched_photos = review_data.get('unmatched_photos', [])
|
|
|
|
|
|
if not unmatched_photos:
|
|
|
return jsonify({'error': 'No unmatched photos found'}), 404
|
|
|
|
|
|
print(f"[Download] Starting disk-based ZIP for {len(unmatched_photos)} unmatched photos...")
|
|
|
|
|
|
|
|
|
MONTH_ABBREV = {
|
|
|
1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr",
|
|
|
5: "May", 6: "Jun", 7: "Jul", 8: "Aug",
|
|
|
9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
|
|
|
}
|
|
|
|
|
|
|
|
|
from photo_selector.utils import get_photo_timestamp
|
|
|
|
|
|
|
|
|
photos_by_month = defaultdict(list)
|
|
|
photos_no_timestamp = []
|
|
|
|
|
|
for photo in unmatched_photos:
|
|
|
filename = photo.get('filename', '')
|
|
|
ts = photo.get('timestamp')
|
|
|
|
|
|
|
|
|
if not ts:
|
|
|
photo_path = os.path.join(upload_dir, filename)
|
|
|
if os.path.exists(photo_path):
|
|
|
dt = get_photo_timestamp(photo_path)
|
|
|
if dt:
|
|
|
ts = dt.timestamp()
|
|
|
|
|
|
if ts:
|
|
|
dt = datetime.fromtimestamp(ts)
|
|
|
month_key = (dt.year, dt.month)
|
|
|
photos_by_month[month_key].append({
|
|
|
'filename': filename,
|
|
|
'timestamp': ts
|
|
|
})
|
|
|
else:
|
|
|
photos_no_timestamp.append({'filename': filename})
|
|
|
|
|
|
|
|
|
for month_key in photos_by_month:
|
|
|
photos_by_month[month_key].sort(key=lambda x: x['timestamp'])
|
|
|
|
|
|
|
|
|
temp_zip_path = os.path.join(tempfile.gettempdir(), f'unmatched_photos_{job_id}.zip')
|
|
|
files_added = 0
|
|
|
|
|
|
try:
|
|
|
with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_STORED) as zf:
|
|
|
|
|
|
for month_key in sorted(photos_by_month.keys()):
|
|
|
year, month = month_key
|
|
|
month_abbrev = MONTH_ABBREV[month]
|
|
|
photos = photos_by_month[month_key]
|
|
|
|
|
|
for idx, photo in enumerate(photos, start=1):
|
|
|
original_filename = photo['filename']
|
|
|
photo_path = os.path.join(upload_dir, original_filename)
|
|
|
|
|
|
if os.path.exists(photo_path):
|
|
|
ext = os.path.splitext(original_filename)[1]
|
|
|
base_name = os.path.splitext(original_filename)[0]
|
|
|
new_filename = f"{month_abbrev}_{idx}_{base_name}{ext}"
|
|
|
zf.write(photo_path, new_filename)
|
|
|
files_added += 1
|
|
|
if files_added % 100 == 0:
|
|
|
print(f"[Download] Added {files_added} files to ZIP...")
|
|
|
|
|
|
|
|
|
for idx, photo in enumerate(photos_no_timestamp, start=1):
|
|
|
original_filename = photo['filename']
|
|
|
photo_path = os.path.join(upload_dir, original_filename)
|
|
|
|
|
|
if os.path.exists(photo_path):
|
|
|
ext = os.path.splitext(original_filename)[1]
|
|
|
base_name = os.path.splitext(original_filename)[0]
|
|
|
new_filename = f"NoDate_{idx}_{base_name}{ext}"
|
|
|
zf.write(photo_path, new_filename)
|
|
|
files_added += 1
|
|
|
|
|
|
if files_added == 0:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
return jsonify({'error': 'No files found in upload directory'}), 404
|
|
|
|
|
|
zip_size_mb = os.path.getsize(temp_zip_path) / (1024 * 1024)
|
|
|
print(f"[Download] ZIP created: {files_added} files, {zip_size_mb:.1f} MB")
|
|
|
|
|
|
|
|
|
def generate_and_cleanup():
|
|
|
try:
|
|
|
with open(temp_zip_path, 'rb') as f:
|
|
|
while True:
|
|
|
chunk = f.read(8192 * 16)
|
|
|
if not chunk:
|
|
|
break
|
|
|
yield chunk
|
|
|
finally:
|
|
|
try:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
print(f"[Download] Cleaned up temp ZIP: {temp_zip_path}")
|
|
|
except Exception as e:
|
|
|
print(f"[Download] Error cleaning up temp ZIP: {e}")
|
|
|
|
|
|
return Response(
|
|
|
generate_and_cleanup(),
|
|
|
mimetype='application/zip',
|
|
|
headers={
|
|
|
'Content-Disposition': f'attachment; filename=unmatched_photos_{job_id}.zip',
|
|
|
'Content-Length': str(os.path.getsize(temp_zip_path))
|
|
|
}
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
if os.path.exists(temp_zip_path):
|
|
|
os.remove(temp_zip_path)
|
|
|
print(f"[Download] Error creating ZIP: {e}")
|
|
|
return jsonify({'error': f'Error creating ZIP: {str(e)}'}), 500
|
|
|
|
|
|
|
|
|
@app.route('/cleanup/<job_id>', methods=['POST'])
|
|
|
def cleanup_job(job_id):
|
|
|
"""Clean up job files."""
|
|
|
if job_id in processing_jobs:
|
|
|
upload_dir = processing_jobs[job_id].get('upload_dir')
|
|
|
if upload_dir and os.path.exists(upload_dir):
|
|
|
shutil.rmtree(upload_dir)
|
|
|
del processing_jobs[job_id]
|
|
|
|
|
|
results_file = os.path.join(RESULTS_FOLDER, f"{job_id}.json")
|
|
|
if os.path.exists(results_file):
|
|
|
os.remove(results_file)
|
|
|
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
os.remove(review_file)
|
|
|
|
|
|
return jsonify({'message': 'Cleaned up'})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/step3_review/<job_id>')
|
|
|
def step3_review(job_id):
|
|
|
"""Step 3: Review filtered photos before quality selection."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return render_template('index.html')
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
|
|
|
|
|
|
if job['status'] not in ['review_pending', 'complete']:
|
|
|
|
|
|
return render_template('step2_upload.html',
|
|
|
session_id=session.get('session_id'),
|
|
|
reference_count=job.get('reference_count', 0))
|
|
|
|
|
|
return render_template('step3_review.html', job_id=job_id)
|
|
|
|
|
|
|
|
|
@app.route('/review_data/<job_id>')
|
|
|
def get_review_data(job_id):
|
|
|
"""Get the filtered photos data for review."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
|
|
|
|
|
|
if 'review_data' in job:
|
|
|
return jsonify(job['review_data'])
|
|
|
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
with open(review_file, 'r') as f:
|
|
|
review_data = json.load(f)
|
|
|
return jsonify(review_data)
|
|
|
|
|
|
return jsonify({'error': 'Review data not found'}), 404
|
|
|
|
|
|
|
|
|
@app.route('/review_thumbnail/<job_id>/<filename>')
|
|
|
def get_review_thumbnail(job_id, filename):
|
|
|
"""Serve thumbnail for review page."""
|
|
|
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
if os.path.exists(os.path.join(thumb_dir, filename)):
|
|
|
return send_from_directory(thumb_dir, filename)
|
|
|
|
|
|
|
|
|
if job_id in processing_jobs:
|
|
|
job = processing_jobs[job_id]
|
|
|
upload_dir = job.get('upload_dir', '')
|
|
|
fallback_dir = os.path.join(upload_dir, 'thumbnails')
|
|
|
if os.path.exists(os.path.join(fallback_dir, filename)):
|
|
|
return send_from_directory(fallback_dir, filename)
|
|
|
|
|
|
return send_from_directory(thumb_dir, filename)
|
|
|
|
|
|
|
|
|
@app.route('/review_photo/<job_id>/<filename>')
|
|
|
def get_review_photo(job_id, filename):
|
|
|
"""Serve full-size photo for review modal with EXIF rotation handling."""
|
|
|
from io import BytesIO
|
|
|
from PIL import ExifTags
|
|
|
|
|
|
photo_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
filepath = os.path.join(photo_dir, filename)
|
|
|
|
|
|
if not os.path.exists(filepath):
|
|
|
return jsonify({'error': 'File not found'}), 404
|
|
|
|
|
|
ext = os.path.splitext(filename)[1].lower()
|
|
|
|
|
|
|
|
|
if ext in ['.heic', '.heif']:
|
|
|
try:
|
|
|
img = Image.open(filepath)
|
|
|
img = img.convert('RGB')
|
|
|
buffer = BytesIO()
|
|
|
img.save(buffer, format='JPEG', quality=90)
|
|
|
buffer.seek(0)
|
|
|
return send_file(buffer, mimetype='image/jpeg')
|
|
|
except Exception as e:
|
|
|
print(f"Error converting HEIC: {e}")
|
|
|
return send_from_directory(photo_dir, filename)
|
|
|
|
|
|
|
|
|
if ext in ['.jpg', '.jpeg']:
|
|
|
try:
|
|
|
img = Image.open(filepath)
|
|
|
|
|
|
|
|
|
try:
|
|
|
for orientation in ExifTags.TAGS.keys():
|
|
|
if ExifTags.TAGS[orientation] == 'Orientation':
|
|
|
break
|
|
|
exif = img._getexif()
|
|
|
if exif is not None:
|
|
|
orientation_value = exif.get(orientation)
|
|
|
if orientation_value == 3:
|
|
|
img = img.rotate(180, expand=True)
|
|
|
elif orientation_value == 6:
|
|
|
img = img.rotate(270, expand=True)
|
|
|
elif orientation_value == 8:
|
|
|
img = img.rotate(90, expand=True)
|
|
|
except (AttributeError, KeyError, IndexError):
|
|
|
pass
|
|
|
|
|
|
if img.mode != 'RGB':
|
|
|
img = img.convert('RGB')
|
|
|
|
|
|
buffer = BytesIO()
|
|
|
img.save(buffer, format='JPEG', quality=90)
|
|
|
buffer.seek(0)
|
|
|
return send_file(buffer, mimetype='image/jpeg')
|
|
|
except Exception as e:
|
|
|
print(f"Error processing JPEG: {e}")
|
|
|
return send_from_directory(photo_dir, filename)
|
|
|
|
|
|
return send_from_directory(photo_dir, filename)
|
|
|
|
|
|
|
|
|
@app.route('/confirm_selection/<job_id>', methods=['POST'])
|
|
|
def confirm_selection(job_id):
|
|
|
"""User confirms their selection - proceed to quality-based selection."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
|
|
|
|
|
|
data = request.get_json()
|
|
|
if not data or 'selected_photos' not in data:
|
|
|
return jsonify({'error': 'No photos selected'}), 400
|
|
|
|
|
|
confirmed_photos = data['selected_photos']
|
|
|
if len(confirmed_photos) == 0:
|
|
|
return jsonify({'error': 'At least one photo must be selected'}), 400
|
|
|
|
|
|
|
|
|
embedding_model = data.get('embedding_model', 'siglip')
|
|
|
if embedding_model not in ['siglip', 'clip']:
|
|
|
embedding_model = 'siglip'
|
|
|
|
|
|
|
|
|
quality_mode = job.get('quality_mode', 'balanced')
|
|
|
similarity_threshold = job.get('similarity_threshold', 0.92)
|
|
|
upload_dir = job.get('upload_dir')
|
|
|
|
|
|
|
|
|
face_data_cache = {}
|
|
|
if 'review_data' in job:
|
|
|
for photo in job['review_data'].get('filtered_photos', []):
|
|
|
filename = photo.get('filename')
|
|
|
if filename:
|
|
|
face_data_cache[filename] = {
|
|
|
'num_faces': photo.get('num_faces', 0),
|
|
|
'face_bboxes': photo.get('face_bboxes', [])
|
|
|
}
|
|
|
else:
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
with open(review_file, 'r') as f:
|
|
|
review_data = json.load(f)
|
|
|
for photo in review_data.get('filtered_photos', []):
|
|
|
filename = photo.get('filename')
|
|
|
if filename:
|
|
|
face_data_cache[filename] = {
|
|
|
'num_faces': photo.get('num_faces', 0),
|
|
|
'face_bboxes': photo.get('face_bboxes', [])
|
|
|
}
|
|
|
|
|
|
print(f"[Job {job_id}] Loaded face data cache for {len(face_data_cache)} photos")
|
|
|
|
|
|
|
|
|
job['status'] = 'processing'
|
|
|
job['progress'] = 0
|
|
|
job['message'] = 'Starting quality-based selection...'
|
|
|
job['confirmed_photos'] = confirmed_photos
|
|
|
|
|
|
|
|
|
thread = threading.Thread(
|
|
|
target=process_photos_quality_selection,
|
|
|
args=(job_id, upload_dir, quality_mode, similarity_threshold, confirmed_photos, face_data_cache, embedding_model)
|
|
|
)
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'message': f'Processing {len(confirmed_photos)} confirmed photos...',
|
|
|
'confirmed_count': len(confirmed_photos)
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/step4_results/<job_id>')
|
|
|
def step4_results(job_id):
|
|
|
"""Step 4: Final results page."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return render_template('index.html')
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
|
|
|
|
|
|
session_id = session.get('session_id')
|
|
|
ref_count = 0
|
|
|
if session_id and session_id in face_matchers:
|
|
|
ref_count = face_matchers[session_id].get_reference_count()
|
|
|
|
|
|
return render_template('step4_results.html',
|
|
|
job_id=job_id,
|
|
|
reference_count=ref_count)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/test-month')
|
|
|
def test_month_page():
|
|
|
"""Test page for single month photo selection."""
|
|
|
return render_template('test_month.html')
|
|
|
|
|
|
|
|
|
@app.route('/test-month/start', methods=['POST'])
|
|
|
def test_month_start():
|
|
|
"""Start processing a single month folder."""
|
|
|
data = request.get_json()
|
|
|
folder_path = data.get('folder_path', '').strip()
|
|
|
target = int(data.get('target', 40))
|
|
|
organize_by_month = data.get('organize_by_month', False)
|
|
|
|
|
|
if not folder_path:
|
|
|
return jsonify({'error': 'No folder path provided'}), 400
|
|
|
|
|
|
if not os.path.isdir(folder_path):
|
|
|
return jsonify({'error': f'Folder not found: {folder_path}'}), 400
|
|
|
|
|
|
|
|
|
extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
|
|
|
image_files = [f for f in os.listdir(folder_path)
|
|
|
if os.path.splitext(f.lower())[1] in extensions]
|
|
|
|
|
|
if not image_files:
|
|
|
return jsonify({'error': 'No valid image files found in folder'}), 400
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
|
|
|
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
os.makedirs(thumb_dir, exist_ok=True)
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'processing',
|
|
|
'progress': 0,
|
|
|
'message': 'Starting test...',
|
|
|
'folder_path': folder_path,
|
|
|
'thumb_dir': thumb_dir,
|
|
|
'target': target,
|
|
|
'total_files': len(image_files),
|
|
|
'results': None,
|
|
|
'organize_by_month': organize_by_month
|
|
|
}
|
|
|
|
|
|
|
|
|
thread = threading.Thread(
|
|
|
target=process_test_month,
|
|
|
args=(job_id, folder_path, target, thumb_dir, organize_by_month)
|
|
|
)
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'total_photos': len(image_files),
|
|
|
'target': target,
|
|
|
'organize_by_month': organize_by_month,
|
|
|
'message': f'Processing {len(image_files)} photos...'
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/test-month/upload', methods=['POST'])
|
|
|
def test_month_upload():
|
|
|
"""Handle uploaded photos for test-month (for HuggingFace deployment)."""
|
|
|
if 'photos' not in request.files:
|
|
|
return jsonify({'error': 'No photos uploaded'}), 400
|
|
|
|
|
|
files = request.files.getlist('photos')
|
|
|
target = int(request.form.get('target', 40))
|
|
|
organize_by_month = request.form.get('organize_by_month', 'false').lower() == 'true'
|
|
|
|
|
|
if not files or len(files) == 0:
|
|
|
return jsonify({'error': 'No photos uploaded'}), 400
|
|
|
|
|
|
|
|
|
extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
|
|
|
valid_files = [f for f in files if f.filename and
|
|
|
os.path.splitext(f.filename.lower())[1] in extensions]
|
|
|
|
|
|
if not valid_files:
|
|
|
return jsonify({'error': 'No valid image files uploaded'}), 400
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id, 'photos')
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(thumb_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
saved_files = []
|
|
|
for f in valid_files:
|
|
|
filename = secure_filename(f.filename)
|
|
|
|
|
|
base, ext = os.path.splitext(filename)
|
|
|
counter = 1
|
|
|
while os.path.exists(os.path.join(upload_dir, filename)):
|
|
|
filename = f"{base}_{counter}{ext}"
|
|
|
counter += 1
|
|
|
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
f.save(filepath)
|
|
|
saved_files.append(filename)
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'processing',
|
|
|
'progress': 0,
|
|
|
'message': 'Starting test...',
|
|
|
'folder_path': upload_dir,
|
|
|
'thumb_dir': thumb_dir,
|
|
|
'target': target,
|
|
|
'total_files': len(saved_files),
|
|
|
'results': None,
|
|
|
'is_upload': True,
|
|
|
'organize_by_month': organize_by_month
|
|
|
}
|
|
|
|
|
|
|
|
|
thread = threading.Thread(
|
|
|
target=process_test_month,
|
|
|
args=(job_id, upload_dir, target, thumb_dir, organize_by_month)
|
|
|
)
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'total_photos': len(saved_files),
|
|
|
'target': target,
|
|
|
'organize_by_month': organize_by_month,
|
|
|
'message': f'Processing {len(saved_files)} uploaded photos...'
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/test-month/upload-init', methods=['POST'])
|
|
|
def test_month_upload_init():
|
|
|
"""Initialize chunked upload for test-month."""
|
|
|
data = request.json
|
|
|
total_files = data.get('total_files', 0)
|
|
|
target = data.get('target', 40)
|
|
|
organize_by_month = data.get('organize_by_month', False)
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id, 'photos')
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(thumb_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
session_id = f"test_{job_id}"
|
|
|
upload_sessions[session_id] = {
|
|
|
'job_id': job_id,
|
|
|
'upload_dir': upload_dir,
|
|
|
'thumb_dir': thumb_dir,
|
|
|
'target': target,
|
|
|
'organize_by_month': organize_by_month,
|
|
|
'total_files': total_files,
|
|
|
'uploaded_files': []
|
|
|
}
|
|
|
|
|
|
print(f"[Test-Month Upload {job_id}] Initialized for {total_files} files")
|
|
|
|
|
|
return jsonify({
|
|
|
'session_id': session_id,
|
|
|
'job_id': job_id
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/test-month/upload-chunk', methods=['POST'])
|
|
|
def test_month_upload_chunk():
|
|
|
"""Handle a chunk of files for test-month."""
|
|
|
session_id = request.form.get('session_id')
|
|
|
if not session_id or session_id not in upload_sessions:
|
|
|
return jsonify({'error': 'Invalid session'}), 400
|
|
|
|
|
|
session_data = upload_sessions[session_id]
|
|
|
upload_dir = session_data['upload_dir']
|
|
|
files = request.files.getlist('files')
|
|
|
|
|
|
extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
|
|
|
saved_count = 0
|
|
|
|
|
|
for f in files:
|
|
|
if f and f.filename:
|
|
|
ext = os.path.splitext(f.filename.lower())[1]
|
|
|
if ext in extensions:
|
|
|
filename = secure_filename(f.filename)
|
|
|
|
|
|
base, ext = os.path.splitext(filename)
|
|
|
counter = 1
|
|
|
while os.path.exists(os.path.join(upload_dir, filename)):
|
|
|
filename = f"{base}_{counter}{ext}"
|
|
|
counter += 1
|
|
|
|
|
|
f.save(os.path.join(upload_dir, filename))
|
|
|
session_data['uploaded_files'].append(filename)
|
|
|
saved_count += 1
|
|
|
|
|
|
chunk_index = request.form.get('chunk_index', '?')
|
|
|
print(f"[Test-Month Upload {session_data['job_id']}] Chunk {chunk_index}: saved {saved_count} files (total: {len(session_data['uploaded_files'])})")
|
|
|
|
|
|
return jsonify({
|
|
|
'uploaded': len(session_data['uploaded_files']),
|
|
|
'total': session_data['total_files']
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/test-month/upload-complete', methods=['POST'])
|
|
|
def test_month_upload_complete():
|
|
|
"""Complete chunked upload and start processing for test-month."""
|
|
|
data = request.json
|
|
|
session_id = data.get('session_id')
|
|
|
|
|
|
if not session_id or session_id not in upload_sessions:
|
|
|
return jsonify({'error': 'Invalid session'}), 400
|
|
|
|
|
|
session_data = upload_sessions[session_id]
|
|
|
job_id = session_data['job_id']
|
|
|
upload_dir = session_data['upload_dir']
|
|
|
thumb_dir = session_data['thumb_dir']
|
|
|
target = session_data['target']
|
|
|
organize_by_month = session_data['organize_by_month']
|
|
|
saved_files = session_data['uploaded_files']
|
|
|
|
|
|
|
|
|
del upload_sessions[session_id]
|
|
|
|
|
|
if not saved_files:
|
|
|
return jsonify({'error': 'No valid image files uploaded'}), 400
|
|
|
|
|
|
print(f"[Test-Month Upload {job_id}] Complete: {len(saved_files)} files, starting processing...")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'processing',
|
|
|
'progress': 0,
|
|
|
'message': 'Starting test...',
|
|
|
'folder_path': upload_dir,
|
|
|
'thumb_dir': thumb_dir,
|
|
|
'target': target,
|
|
|
'total_files': len(saved_files),
|
|
|
'results': None,
|
|
|
'is_upload': True,
|
|
|
'organize_by_month': organize_by_month
|
|
|
}
|
|
|
|
|
|
|
|
|
thread = threading.Thread(
|
|
|
target=process_test_month,
|
|
|
args=(job_id, upload_dir, target, thumb_dir, organize_by_month)
|
|
|
)
|
|
|
thread.start()
|
|
|
|
|
|
return jsonify({
|
|
|
'job_id': job_id,
|
|
|
'total_photos': len(saved_files),
|
|
|
'target': target,
|
|
|
'organize_by_month': organize_by_month,
|
|
|
'message': f'Processing {len(saved_files)} uploaded photos...'
|
|
|
})
|
|
|
|
|
|
|
|
|
def process_test_month(job_id, folder_path, target, thumb_dir, organize_by_month=False):
|
|
|
"""Process photos for testing with category-aware selection.
|
|
|
|
|
|
If organize_by_month is True, groups photos by EXIF date and runs
|
|
|
selection per month (same as main app Step 4).
|
|
|
"""
|
|
|
try:
|
|
|
from photo_selector.monthly_selector import MonthlyPhotoSelector, CategoryDetector
|
|
|
from photo_selector.siglip_embeddings import SigLIPEmbedder
|
|
|
from photo_selector.scoring import PhotoScorer
|
|
|
from datetime import datetime
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
|
|
|
|
|
|
extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp'}
|
|
|
photo_files = [f for f in os.listdir(folder_path)
|
|
|
if os.path.splitext(f.lower())[1] in extensions]
|
|
|
photo_paths = [os.path.join(folder_path, f) for f in photo_files]
|
|
|
|
|
|
job['message'] = 'Loading SigLIP model...'
|
|
|
job['progress'] = 5
|
|
|
|
|
|
|
|
|
embedder = SigLIPEmbedder()
|
|
|
selector = MonthlyPhotoSelector()
|
|
|
|
|
|
|
|
|
job['message'] = f'Generating SigLIP embeddings for {len(photo_paths)} photos...'
|
|
|
job['progress'] = 10
|
|
|
embeddings = embedder.process_folder(folder_path)
|
|
|
job['progress'] = 30
|
|
|
|
|
|
|
|
|
job['message'] = 'Detecting photo categories...'
|
|
|
job['progress'] = 35
|
|
|
selector._ensure_category_detector()
|
|
|
categories = selector.category_detector.detect_categories_batch(photo_paths)
|
|
|
job['progress'] = 45
|
|
|
|
|
|
|
|
|
job['message'] = 'Scoring photos...'
|
|
|
scorer = PhotoScorer()
|
|
|
scored_photos = []
|
|
|
|
|
|
for i, photo_path in enumerate(photo_paths):
|
|
|
filename = os.path.basename(photo_path)
|
|
|
scores = scorer.score_photo(photo_path)
|
|
|
|
|
|
|
|
|
cat, conf = categories.get(filename, ('unknown', 0.0))
|
|
|
|
|
|
|
|
|
dt = selector.get_photo_date(photo_path)
|
|
|
|
|
|
scored_photos.append({
|
|
|
'filename': filename,
|
|
|
'filepath': photo_path,
|
|
|
'total': scores.get('total', 0),
|
|
|
'face_quality': scores.get('face_quality', 0),
|
|
|
'aesthetic_quality': scores.get('aesthetic_quality', 0),
|
|
|
'emotional_signal': scores.get('emotional_signal', 0),
|
|
|
'uniqueness': scores.get('uniqueness', 0.5),
|
|
|
'num_faces': scores.get('num_faces', 0),
|
|
|
'category': cat,
|
|
|
'category_confidence': conf,
|
|
|
'timestamp': dt.timestamp() if dt else None
|
|
|
})
|
|
|
|
|
|
if (i + 1) % 10 == 0:
|
|
|
job['progress'] = 45 + int((i / len(photo_paths)) * 20)
|
|
|
job['message'] = f'Scoring photos... {i + 1}/{len(photo_paths)}'
|
|
|
|
|
|
job['progress'] = 70
|
|
|
|
|
|
|
|
|
if organize_by_month:
|
|
|
|
|
|
job['message'] = 'Grouping photos by month...'
|
|
|
|
|
|
|
|
|
MONTH_NAMES = ['January', 'February', 'March', 'April', 'May', 'June',
|
|
|
'July', 'August', 'September', 'October', 'November', 'December']
|
|
|
|
|
|
photos_by_month = {}
|
|
|
for photo in scored_photos:
|
|
|
ts = photo.get('timestamp')
|
|
|
if ts:
|
|
|
dt = datetime.fromtimestamp(ts)
|
|
|
month_name = MONTH_NAMES[dt.month - 1]
|
|
|
else:
|
|
|
month_name = 'Unknown'
|
|
|
|
|
|
photo['month'] = month_name
|
|
|
if month_name not in photos_by_month:
|
|
|
photos_by_month[month_name] = []
|
|
|
photos_by_month[month_name].append(photo)
|
|
|
|
|
|
|
|
|
total_photos = len(scored_photos)
|
|
|
selected = []
|
|
|
month_stats = []
|
|
|
|
|
|
for month_name, month_photos in photos_by_month.items():
|
|
|
|
|
|
month_proportion = len(month_photos) / total_photos
|
|
|
month_target = max(1, int(target * month_proportion))
|
|
|
|
|
|
job['message'] = f'Processing {month_name} ({len(month_photos)} photos)...'
|
|
|
|
|
|
|
|
|
month_embeddings = {p['filename']: embeddings.get(p['filename']) for p in month_photos}
|
|
|
|
|
|
|
|
|
month_selected = selector.select_hybrid_hdbscan(month_photos, month_embeddings, target=month_target)
|
|
|
|
|
|
|
|
|
for photo in month_selected:
|
|
|
photo['month'] = month_name
|
|
|
|
|
|
selected.extend(month_selected)
|
|
|
|
|
|
month_stats.append({
|
|
|
'month': month_name,
|
|
|
'total_photos': len(month_photos),
|
|
|
'selected': len(month_selected),
|
|
|
'target': month_target
|
|
|
})
|
|
|
|
|
|
print(f"[Test Month {job_id}] Organized by month: {len(photos_by_month)} months, {len(selected)} total selected")
|
|
|
else:
|
|
|
|
|
|
job['message'] = 'Running category-aware clustering and selection...'
|
|
|
selected = selector.select_hybrid_hdbscan(scored_photos, embeddings, target=target)
|
|
|
|
|
|
for photo in selected:
|
|
|
photo['month'] = 'Unknown'
|
|
|
for photo in scored_photos:
|
|
|
photo['month'] = 'Unknown'
|
|
|
month_stats = []
|
|
|
|
|
|
job['progress'] = 85
|
|
|
job['message'] = 'Creating thumbnails...'
|
|
|
|
|
|
|
|
|
selected_results = []
|
|
|
for photo in selected:
|
|
|
filename = photo['filename']
|
|
|
filepath = photo['filepath']
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumb_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(filepath, thumb_path)
|
|
|
|
|
|
|
|
|
photo_emb = embeddings.get(filename)
|
|
|
embedding_list = photo_emb.tolist() if photo_emb is not None else None
|
|
|
|
|
|
|
|
|
ts = photo.get('timestamp')
|
|
|
datetime_str = ''
|
|
|
if ts:
|
|
|
dt = datetime.fromtimestamp(ts)
|
|
|
datetime_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
selected_results.append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'score': float(photo.get('total', 0)),
|
|
|
'face_quality': float(photo.get('face_quality', 0)),
|
|
|
'aesthetic_quality': float(photo.get('aesthetic_quality', 0)),
|
|
|
'emotional_signal': float(photo.get('emotional_signal', 0)),
|
|
|
'uniqueness': float(photo.get('uniqueness', 0)),
|
|
|
'num_faces': int(photo.get('num_faces', 0)),
|
|
|
'multi_face_bonus': float(photo.get('multi_face_bonus', 0)),
|
|
|
'cluster_id': photo.get('cluster_id', -1),
|
|
|
'max_similarity': float(photo.get('max_similarity', 0)),
|
|
|
'category': photo.get('category', 'unknown'),
|
|
|
'category_confidence': float(photo.get('category_confidence', 0)),
|
|
|
'event_id': photo.get('event_id', -1),
|
|
|
'selection_reason': photo.get('selection_reason', ''),
|
|
|
'datetime': datetime_str,
|
|
|
'embedding': embedding_list,
|
|
|
'month': photo.get('month', 'Unknown')
|
|
|
})
|
|
|
|
|
|
|
|
|
selected_filenames = {p['filename'] for p in selected}
|
|
|
rejected_results = []
|
|
|
|
|
|
for photo in scored_photos:
|
|
|
if photo['filename'] not in selected_filenames:
|
|
|
filename = photo['filename']
|
|
|
filepath = photo['filepath']
|
|
|
thumb_name = get_thumbnail_name(filename)
|
|
|
thumb_path = os.path.join(thumb_dir, thumb_name)
|
|
|
|
|
|
create_thumbnail(filepath, thumb_path)
|
|
|
|
|
|
photo_emb = embeddings.get(filename)
|
|
|
embedding_list = photo_emb.tolist() if photo_emb is not None else None
|
|
|
|
|
|
|
|
|
ts = photo.get('timestamp')
|
|
|
datetime_str = ''
|
|
|
if ts:
|
|
|
from datetime import datetime
|
|
|
dt = datetime.fromtimestamp(ts)
|
|
|
datetime_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
rejected_results.append({
|
|
|
'filename': filename,
|
|
|
'thumbnail': thumb_name,
|
|
|
'score': float(photo.get('total', 0)),
|
|
|
'face_quality': float(photo.get('face_quality', 0)),
|
|
|
'aesthetic_quality': float(photo.get('aesthetic_quality', 0)),
|
|
|
'num_faces': int(photo.get('num_faces', 0)),
|
|
|
'cluster_id': photo.get('cluster_id', -1),
|
|
|
'category': photo.get('category', 'unknown'),
|
|
|
'event_id': photo.get('event_id', -1),
|
|
|
'embedding': embedding_list,
|
|
|
'max_similarity': float(photo.get('max_similarity', 0)),
|
|
|
'selection_reason': photo.get('rejection_reason', 'Not selected'),
|
|
|
'datetime': datetime_str,
|
|
|
'month': photo.get('month', 'Unknown')
|
|
|
})
|
|
|
|
|
|
|
|
|
selected_results.sort(key=lambda x: x['score'], reverse=True)
|
|
|
rejected_results.sort(key=lambda x: x['score'], reverse=True)
|
|
|
|
|
|
|
|
|
cluster_counts = {}
|
|
|
for photo in selected_results:
|
|
|
cid = photo.get('cluster_id', -1)
|
|
|
cluster_counts[cid] = cluster_counts.get(cid, 0) + 1
|
|
|
|
|
|
|
|
|
category_counts = {}
|
|
|
for photo in selected_results:
|
|
|
cat = photo.get('category', 'unknown')
|
|
|
category_counts[cat] = category_counts.get(cat, 0) + 1
|
|
|
|
|
|
|
|
|
job['results'] = {
|
|
|
'selected': selected_results,
|
|
|
'rejected': rejected_results,
|
|
|
'summary': {
|
|
|
'total_photos': len(photo_paths),
|
|
|
'selected_count': len(selected_results),
|
|
|
'rejected_count': len(rejected_results),
|
|
|
'target': target
|
|
|
},
|
|
|
'cluster_distribution': cluster_counts,
|
|
|
'category_distribution': category_counts,
|
|
|
'organized_by_month': organize_by_month,
|
|
|
'month_stats': month_stats
|
|
|
}
|
|
|
|
|
|
job['status'] = 'complete'
|
|
|
job['progress'] = 100
|
|
|
job['message'] = f'Done! Selected {len(selected_results)} of {len(photo_paths)} photos'
|
|
|
|
|
|
print(f"\n[Test Month {job_id}] Complete!")
|
|
|
print(f" - Total: {len(photo_paths)}")
|
|
|
print(f" - Selected: {len(selected_results)}")
|
|
|
print(f" - Organized by month: {organize_by_month}")
|
|
|
if month_stats:
|
|
|
print(f" - Month stats: {month_stats}")
|
|
|
print(f" - Clusters: {cluster_counts}")
|
|
|
print(f" - Categories: {category_counts}")
|
|
|
|
|
|
except Exception as e:
|
|
|
processing_jobs[job_id]['status'] = 'error'
|
|
|
processing_jobs[job_id]['message'] = str(e)
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
@app.route('/test-month/status/<job_id>')
|
|
|
def test_month_status(job_id):
|
|
|
"""Get test month job status."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
return jsonify({
|
|
|
'status': job['status'],
|
|
|
'progress': job['progress'],
|
|
|
'message': job['message']
|
|
|
})
|
|
|
|
|
|
|
|
|
@app.route('/test-month/results/<job_id>')
|
|
|
def test_month_results(job_id):
|
|
|
"""Get test month results."""
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
if job['status'] != 'complete':
|
|
|
return jsonify({'error': 'Not complete', 'status': job['status']}), 400
|
|
|
|
|
|
return jsonify(job['results'])
|
|
|
|
|
|
|
|
|
@app.route('/test-month/thumbnail/<job_id>/<filename>')
|
|
|
def test_month_thumbnail(job_id, filename):
|
|
|
"""Serve test month thumbnails."""
|
|
|
thumb_dir = os.path.join(UPLOAD_FOLDER, job_id, 'thumbnails')
|
|
|
return send_from_directory(thumb_dir, filename)
|
|
|
|
|
|
|
|
|
@app.route('/test-month/download/<job_id>')
|
|
|
def test_month_download(job_id):
|
|
|
"""Download selected photos from test-month as ZIP."""
|
|
|
import zipfile
|
|
|
from io import BytesIO
|
|
|
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
if job['status'] != 'complete':
|
|
|
return jsonify({'error': 'Processing not complete'}), 400
|
|
|
|
|
|
results = job.get('results', {})
|
|
|
selected = results.get('selected', [])
|
|
|
folder_path = job.get('folder_path', '')
|
|
|
|
|
|
if not selected:
|
|
|
return jsonify({'error': 'No selected photos'}), 404
|
|
|
|
|
|
if not folder_path:
|
|
|
return jsonify({'error': 'Folder path not found'}), 404
|
|
|
|
|
|
|
|
|
memory_file = BytesIO()
|
|
|
files_added = 0
|
|
|
with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
|
|
|
for photo in selected:
|
|
|
filename = photo.get('filename', '')
|
|
|
|
|
|
photo_path = os.path.join(folder_path, filename)
|
|
|
if os.path.exists(photo_path):
|
|
|
zf.write(photo_path, filename)
|
|
|
files_added += 1
|
|
|
|
|
|
if files_added == 0:
|
|
|
return jsonify({'error': 'No files could be added to ZIP'}), 404
|
|
|
|
|
|
memory_file.seek(0)
|
|
|
return send_file(
|
|
|
memory_file,
|
|
|
mimetype='application/zip',
|
|
|
as_attachment=True,
|
|
|
download_name=f'test_selected_{job_id}.zip'
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/datasets')
|
|
|
def datasets_page():
|
|
|
"""Show saved datasets page."""
|
|
|
return render_template('datasets.html')
|
|
|
|
|
|
|
|
|
@app.route('/api/datasets')
|
|
|
def list_datasets():
|
|
|
"""List all saved datasets (local + Supabase)."""
|
|
|
datasets = []
|
|
|
seen_names = set()
|
|
|
|
|
|
|
|
|
if os.path.exists(DATASETS_FOLDER):
|
|
|
for name in os.listdir(DATASETS_FOLDER):
|
|
|
meta_path = os.path.join(DATASETS_FOLDER, name, 'metadata.json')
|
|
|
if os.path.exists(meta_path):
|
|
|
try:
|
|
|
with open(meta_path, 'r') as f:
|
|
|
meta = json.load(f)
|
|
|
meta['folder_name'] = name
|
|
|
meta['source'] = 'local'
|
|
|
datasets.append(meta)
|
|
|
seen_names.add(name)
|
|
|
except:
|
|
|
pass
|
|
|
|
|
|
|
|
|
if is_supabase_available():
|
|
|
try:
|
|
|
supabase_datasets = list_datasets_from_supabase()
|
|
|
for meta in supabase_datasets:
|
|
|
folder_name = meta.get('folder_name', '')
|
|
|
|
|
|
if folder_name and folder_name not in seen_names:
|
|
|
meta['source'] = 'supabase'
|
|
|
datasets.append(meta)
|
|
|
except Exception as e:
|
|
|
print(f"[Datasets] Error fetching from Supabase: {e}")
|
|
|
|
|
|
|
|
|
datasets.sort(key=lambda x: x.get('created_at', '') or '', reverse=True)
|
|
|
return jsonify({'datasets': datasets, 'supabase_available': is_supabase_available()})
|
|
|
|
|
|
|
|
|
@app.route('/save_dataset/<job_id>', methods=['POST'])
|
|
|
def save_dataset(job_id):
|
|
|
"""Save dataset after Step 3 review."""
|
|
|
try:
|
|
|
data = request.get_json()
|
|
|
dataset_name = data.get('name', f"dataset_{job_id}")
|
|
|
|
|
|
|
|
|
import re
|
|
|
safe_name = re.sub(r'[^a-zA-Z0-9_\- ]', '', dataset_name).strip()
|
|
|
if not safe_name:
|
|
|
safe_name = f"dataset_{job_id}"
|
|
|
|
|
|
|
|
|
folder_name = safe_name.replace(' ', '_')
|
|
|
dataset_path = os.path.join(DATASETS_FOLDER, folder_name)
|
|
|
|
|
|
|
|
|
if os.path.exists(dataset_path):
|
|
|
return jsonify({'error': f'Dataset "{safe_name}" already exists'}), 400
|
|
|
|
|
|
os.makedirs(dataset_path, exist_ok=True)
|
|
|
|
|
|
|
|
|
if job_id not in processing_jobs:
|
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
|
|
job = processing_jobs[job_id]
|
|
|
session_id = job.get('session_id')
|
|
|
|
|
|
|
|
|
if session_id and session_id in face_matchers:
|
|
|
matcher = face_matchers[session_id]
|
|
|
embeddings_path = os.path.join(dataset_path, 'reference_embeddings.npz')
|
|
|
np.savez_compressed(
|
|
|
embeddings_path,
|
|
|
embeddings=np.array(matcher.reference_embeddings),
|
|
|
average=matcher.average_embedding,
|
|
|
threshold=matcher.similarity_threshold
|
|
|
)
|
|
|
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
shutil.copy(review_file, os.path.join(dataset_path, 'face_results.json'))
|
|
|
|
|
|
|
|
|
confirmed_photos = job.get('confirmed_photos', [])
|
|
|
if not confirmed_photos:
|
|
|
|
|
|
review_file = os.path.join(RESULTS_FOLDER, f"{job_id}_review.json")
|
|
|
if os.path.exists(review_file):
|
|
|
with open(review_file, 'r') as f:
|
|
|
review_data = json.load(f)
|
|
|
filtered = review_data.get('filtered_photos', [])
|
|
|
confirmed_photos = [p['filename'] for p in filtered]
|
|
|
|
|
|
|
|
|
if not confirmed_photos:
|
|
|
results_file = os.path.join(RESULTS_FOLDER, f"{job_id}.json")
|
|
|
if os.path.exists(results_file):
|
|
|
with open(results_file, 'r') as f:
|
|
|
results_data = json.load(f)
|
|
|
selected = results_data.get('selected_photos', [])
|
|
|
rejected = results_data.get('rejected_photos', [])
|
|
|
confirmed_photos = [p['filename'] for p in selected + rejected]
|
|
|
|
|
|
with open(os.path.join(dataset_path, 'confirmed_photos.json'), 'w') as f:
|
|
|
json.dump({'photos': confirmed_photos}, f)
|
|
|
|
|
|
|
|
|
upload_dir = job.get('upload_dir', os.path.join(UPLOAD_FOLDER, job_id))
|
|
|
thumb_dir = os.path.join(upload_dir, 'thumbnails')
|
|
|
dataset_thumb_dir = os.path.join(dataset_path, 'thumbnails')
|
|
|
if os.path.exists(thumb_dir):
|
|
|
shutil.copytree(thumb_dir, dataset_thumb_dir)
|
|
|
|
|
|
|
|
|
photos_dir = os.path.join(dataset_path, 'photos')
|
|
|
os.makedirs(photos_dir, exist_ok=True)
|
|
|
for filename in confirmed_photos:
|
|
|
src = os.path.join(upload_dir, filename)
|
|
|
if os.path.exists(src):
|
|
|
shutil.copy(src, os.path.join(photos_dir, filename))
|
|
|
|
|
|
|
|
|
metadata = {
|
|
|
'name': safe_name,
|
|
|
'created_at': datetime.now().isoformat(),
|
|
|
'original_job_id': job_id,
|
|
|
'session_id': session_id,
|
|
|
'total_photos': len(confirmed_photos),
|
|
|
'quality_mode': job.get('quality_mode', 'balanced'),
|
|
|
'similarity_threshold': job.get('similarity_threshold', 0.4),
|
|
|
'reference_count': len(face_matchers.get(session_id, {}).reference_embeddings) if session_id in face_matchers else 0
|
|
|
}
|
|
|
|
|
|
with open(os.path.join(dataset_path, 'metadata.json'), 'w') as f:
|
|
|
json.dump(metadata, f, indent=2)
|
|
|
|
|
|
print(f"[Dataset] Saved '{safe_name}' with {len(confirmed_photos)} photos locally")
|
|
|
|
|
|
|
|
|
supabase_saved = False
|
|
|
if is_supabase_available():
|
|
|
try:
|
|
|
|
|
|
embeddings_path = os.path.join(dataset_path, 'reference_embeddings.npz')
|
|
|
embeddings_data = None
|
|
|
if os.path.exists(embeddings_path):
|
|
|
with open(embeddings_path, 'rb') as f:
|
|
|
embeddings_data = f.read()
|
|
|
|
|
|
|
|
|
face_results_path = os.path.join(dataset_path, 'face_results.json')
|
|
|
face_results = {}
|
|
|
if os.path.exists(face_results_path):
|
|
|
with open(face_results_path, 'r') as f:
|
|
|
face_results = json.load(f)
|
|
|
|
|
|
|
|
|
if embeddings_data:
|
|
|
supabase_saved = save_dataset_to_supabase(
|
|
|
folder_name,
|
|
|
embeddings_data,
|
|
|
face_results,
|
|
|
metadata
|
|
|
)
|
|
|
except Exception as e:
|
|
|
print(f"[Dataset] Supabase save error: {e}")
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'name': safe_name,
|
|
|
'folder_name': folder_name,
|
|
|
'total_photos': len(confirmed_photos),
|
|
|
'supabase_saved': supabase_saved
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/load_dataset/<dataset_name>')
|
|
|
def load_dataset(dataset_name):
|
|
|
"""Load a saved dataset and redirect to review or selection."""
|
|
|
try:
|
|
|
dataset_path = os.path.join(DATASETS_FOLDER, dataset_name)
|
|
|
from_supabase = False
|
|
|
|
|
|
|
|
|
if not os.path.exists(dataset_path):
|
|
|
|
|
|
if is_supabase_available():
|
|
|
print(f"[Dataset] Not found locally, trying Supabase...")
|
|
|
supabase_data = load_dataset_from_supabase(dataset_name)
|
|
|
if supabase_data:
|
|
|
from_supabase = True
|
|
|
|
|
|
return redirect(f'/reupload_photos/{dataset_name}')
|
|
|
else:
|
|
|
return jsonify({'error': 'Dataset not found in local or Supabase'}), 404
|
|
|
else:
|
|
|
return jsonify({'error': 'Dataset not found'}), 404
|
|
|
|
|
|
|
|
|
with open(os.path.join(dataset_path, 'metadata.json'), 'r') as f:
|
|
|
metadata = json.load(f)
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
new_session_id = str(uuid.uuid4())[:8]
|
|
|
|
|
|
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
dataset_photos_dir = os.path.join(dataset_path, 'photos')
|
|
|
if os.path.exists(dataset_photos_dir):
|
|
|
for filename in os.listdir(dataset_photos_dir):
|
|
|
src = os.path.join(dataset_photos_dir, filename)
|
|
|
shutil.copy(src, os.path.join(upload_dir, filename))
|
|
|
|
|
|
|
|
|
dataset_thumb_dir = os.path.join(dataset_path, 'thumbnails')
|
|
|
if os.path.exists(dataset_thumb_dir):
|
|
|
shutil.copytree(dataset_thumb_dir, os.path.join(upload_dir, 'thumbnails'))
|
|
|
|
|
|
|
|
|
embeddings_path = os.path.join(dataset_path, 'reference_embeddings.npz')
|
|
|
if os.path.exists(embeddings_path):
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
data = np.load(embeddings_path, allow_pickle=True)
|
|
|
matcher = FaceMatcher(similarity_threshold=float(data['threshold']))
|
|
|
matcher.reference_embeddings = list(data['embeddings'])
|
|
|
matcher.average_embedding = data['average']
|
|
|
face_matchers[new_session_id] = matcher
|
|
|
session['face_session_id'] = new_session_id
|
|
|
|
|
|
|
|
|
confirmed_file = os.path.join(dataset_path, 'confirmed_photos.json')
|
|
|
confirmed_photos = []
|
|
|
if os.path.exists(confirmed_file):
|
|
|
with open(confirmed_file, 'r') as f:
|
|
|
confirmed_photos = json.load(f).get('photos', [])
|
|
|
|
|
|
|
|
|
face_results_path = os.path.join(dataset_path, 'face_results.json')
|
|
|
review_data = None
|
|
|
if os.path.exists(face_results_path):
|
|
|
with open(face_results_path, 'r') as f:
|
|
|
review_data = json.load(f)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'review_pending',
|
|
|
'progress': 100,
|
|
|
'message': 'Dataset loaded - ready for review',
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': new_session_id,
|
|
|
'has_reference_photos': True,
|
|
|
'reference_count': metadata.get('reference_count', 0),
|
|
|
'quality_mode': metadata.get('quality_mode', 'balanced'),
|
|
|
'similarity_threshold': metadata.get('similarity_threshold', 0.4),
|
|
|
'confirmed_photos': confirmed_photos,
|
|
|
'review_data': review_data,
|
|
|
'total_photos': len(confirmed_photos),
|
|
|
'from_dataset': dataset_name
|
|
|
}
|
|
|
|
|
|
|
|
|
if review_data:
|
|
|
with open(os.path.join(RESULTS_FOLDER, f"{job_id}_review.json"), 'w') as f:
|
|
|
json.dump(review_data, f)
|
|
|
|
|
|
print(f"[Dataset] Loaded '{dataset_name}' as job {job_id}")
|
|
|
|
|
|
|
|
|
goto = request.args.get('goto', 'review')
|
|
|
|
|
|
if goto == 'select':
|
|
|
|
|
|
return redirect(f'/step4_results/{job_id}?from_dataset=1')
|
|
|
else:
|
|
|
|
|
|
return redirect(f'/step3_review/{job_id}')
|
|
|
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/delete_dataset/<dataset_name>', methods=['DELETE'])
|
|
|
def delete_dataset(dataset_name):
|
|
|
"""Delete a saved dataset (local and Supabase)."""
|
|
|
try:
|
|
|
deleted_local = False
|
|
|
deleted_supabase = False
|
|
|
|
|
|
|
|
|
dataset_path = os.path.join(DATASETS_FOLDER, dataset_name)
|
|
|
if os.path.exists(dataset_path):
|
|
|
shutil.rmtree(dataset_path)
|
|
|
deleted_local = True
|
|
|
print(f"[Dataset] Deleted '{dataset_name}' locally")
|
|
|
|
|
|
|
|
|
if is_supabase_available():
|
|
|
deleted_supabase = delete_dataset_from_supabase(dataset_name)
|
|
|
|
|
|
if not deleted_local and not deleted_supabase:
|
|
|
return jsonify({'error': 'Dataset not found'}), 404
|
|
|
|
|
|
return jsonify({'success': True, 'deleted_local': deleted_local, 'deleted_supabase': deleted_supabase})
|
|
|
|
|
|
except Exception as e:
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/dataset_thumbnail/<dataset_name>/<filename>')
|
|
|
def dataset_thumbnail(dataset_name, filename):
|
|
|
"""Serve dataset thumbnail."""
|
|
|
thumb_dir = os.path.join(DATASETS_FOLDER, dataset_name, 'thumbnails')
|
|
|
return send_from_directory(thumb_dir, filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/reupload_photos/<dataset_name>')
|
|
|
def reupload_photos_page(dataset_name):
|
|
|
"""Show page to re-upload photos for a Supabase dataset."""
|
|
|
|
|
|
if not is_supabase_available():
|
|
|
return jsonify({'error': 'Supabase not available'}), 500
|
|
|
|
|
|
supabase_data = load_dataset_from_supabase(dataset_name)
|
|
|
if not supabase_data:
|
|
|
return jsonify({'error': 'Dataset not found in Supabase'}), 404
|
|
|
|
|
|
metadata = supabase_data.get('metadata', {})
|
|
|
return render_template('reupload_photos.html',
|
|
|
dataset_name=dataset_name,
|
|
|
metadata=metadata)
|
|
|
|
|
|
|
|
|
@app.route('/download_from_gdrive/<dataset_name>', methods=['POST'])
|
|
|
def download_from_gdrive(dataset_name):
|
|
|
"""Download zip from Google Drive and process photos."""
|
|
|
try:
|
|
|
import re
|
|
|
import zipfile
|
|
|
import gdown
|
|
|
|
|
|
data = request.get_json()
|
|
|
gdrive_link = data.get('gdrive_link', '')
|
|
|
|
|
|
print(f"[GDrive] Starting download for dataset '{dataset_name}'")
|
|
|
print(f"[GDrive] Link: {gdrive_link}")
|
|
|
|
|
|
|
|
|
file_id = None
|
|
|
patterns = [
|
|
|
r'/file/d/([a-zA-Z0-9_-]+)',
|
|
|
r'id=([a-zA-Z0-9_-]+)',
|
|
|
r'/d/([a-zA-Z0-9_-]+)'
|
|
|
]
|
|
|
for pattern in patterns:
|
|
|
match = re.search(pattern, gdrive_link)
|
|
|
if match:
|
|
|
file_id = match.group(1)
|
|
|
break
|
|
|
|
|
|
if not file_id:
|
|
|
return jsonify({'error': 'Could not extract file ID from Google Drive link'}), 400
|
|
|
|
|
|
print(f"[GDrive] File ID: {file_id}")
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(os.path.join(upload_dir, 'thumbnails'), exist_ok=True)
|
|
|
|
|
|
|
|
|
zip_path = os.path.join(upload_dir, 'photos.zip')
|
|
|
gdrive_url = f"https://drive.google.com/uc?id={file_id}"
|
|
|
|
|
|
print(f"[GDrive] Downloading using gdown...")
|
|
|
try:
|
|
|
gdown.download(gdrive_url, zip_path, quiet=False, fuzzy=True)
|
|
|
except Exception as e:
|
|
|
print(f"[GDrive] gdown failed: {e}")
|
|
|
|
|
|
try:
|
|
|
gdown.download(gdrive_url, zip_path, quiet=False, fuzzy=True, use_cookies=False)
|
|
|
except Exception as e2:
|
|
|
print(f"[GDrive] gdown retry failed: {e2}")
|
|
|
return jsonify({'error': f'Download failed: {str(e2)}'}), 400
|
|
|
|
|
|
|
|
|
if not os.path.exists(zip_path) or os.path.getsize(zip_path) < 1000:
|
|
|
print(f"[GDrive] ERROR: Download failed or file too small")
|
|
|
return jsonify({'error': 'Download failed. Make sure the file is shared with "Anyone with link".'}), 400
|
|
|
|
|
|
print(f"[GDrive] Download complete: {os.path.getsize(zip_path) / 1024 / 1024:.1f} MB")
|
|
|
|
|
|
|
|
|
print(f"[GDrive] Extracting zip file...")
|
|
|
uploaded_filenames = []
|
|
|
image_extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp', '.bmp', '.gif'}
|
|
|
|
|
|
try:
|
|
|
with zipfile.ZipFile(zip_path, 'r') as zf:
|
|
|
for member in zf.namelist():
|
|
|
if member.endswith('/') or '/__MACOSX' in member or member.startswith('.'):
|
|
|
continue
|
|
|
ext = os.path.splitext(member.lower())[1]
|
|
|
if ext in image_extensions:
|
|
|
filename = secure_filename(os.path.basename(member))
|
|
|
if filename:
|
|
|
with zf.open(member) as src:
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
with open(filepath, 'wb') as dst:
|
|
|
dst.write(src.read())
|
|
|
uploaded_filenames.append(filename)
|
|
|
|
|
|
if len(uploaded_filenames) % 200 == 0:
|
|
|
print(f"[GDrive] Extracted {len(uploaded_filenames)} files...")
|
|
|
|
|
|
print(f"[GDrive] Extracted {len(uploaded_filenames)} photos")
|
|
|
finally:
|
|
|
|
|
|
if os.path.exists(zip_path):
|
|
|
os.remove(zip_path)
|
|
|
|
|
|
|
|
|
print(f"[GDrive] Loading dataset from Supabase...")
|
|
|
supabase_data = load_dataset_from_supabase(dataset_name)
|
|
|
if not supabase_data:
|
|
|
return jsonify({'error': 'Dataset not found in Supabase'}), 404
|
|
|
|
|
|
metadata = supabase_data.get('metadata', {})
|
|
|
face_results = supabase_data.get('face_results', {})
|
|
|
embeddings_data = supabase_data.get('embeddings_data')
|
|
|
|
|
|
|
|
|
new_session_id = str(uuid.uuid4())[:8]
|
|
|
if embeddings_data:
|
|
|
import io
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
data_np = np.load(io.BytesIO(embeddings_data), allow_pickle=True)
|
|
|
matcher = FaceMatcher(similarity_threshold=float(data_np['threshold']))
|
|
|
matcher.reference_embeddings = list(data_np['embeddings'])
|
|
|
matcher.average_embedding = data_np['average']
|
|
|
face_matchers[new_session_id] = matcher
|
|
|
session['face_session_id'] = new_session_id
|
|
|
print(f"[GDrive] Loaded {len(matcher.reference_embeddings)} reference embeddings")
|
|
|
|
|
|
|
|
|
filtered_photos = face_results.get('filtered_photos', [])
|
|
|
uploaded_set = set(uploaded_filenames)
|
|
|
matched_photos = [p for p in filtered_photos if p.get('filename') in uploaded_set]
|
|
|
|
|
|
print(f"[GDrive] Matched {len(matched_photos)} of {len(filtered_photos)} photos")
|
|
|
|
|
|
|
|
|
review_data = {
|
|
|
'filtered_photos': matched_photos,
|
|
|
'total_processed': len(uploaded_filenames),
|
|
|
'match_count': len(matched_photos)
|
|
|
}
|
|
|
|
|
|
with open(os.path.join(RESULTS_FOLDER, f"{job_id}_review.json"), 'w') as f:
|
|
|
json.dump(review_data, f)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'review_pending',
|
|
|
'progress': 100,
|
|
|
'message': 'Photos downloaded from Google Drive',
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': new_session_id,
|
|
|
'has_reference_photos': True,
|
|
|
'reference_count': metadata.get('reference_count', 0),
|
|
|
'quality_mode': metadata.get('quality_mode', 'balanced'),
|
|
|
'similarity_threshold': metadata.get('similarity_threshold', 0.4),
|
|
|
'confirmed_photos': [p['filename'] for p in matched_photos],
|
|
|
'review_data': review_data,
|
|
|
'total_photos': len(matched_photos),
|
|
|
'from_dataset': dataset_name,
|
|
|
'from_supabase': True
|
|
|
}
|
|
|
|
|
|
print(f"[GDrive] SUCCESS! Redirecting to step3_review/{job_id}")
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'job_id': job_id,
|
|
|
'matched_photos': len(matched_photos),
|
|
|
'total_uploaded': len(uploaded_filenames),
|
|
|
'redirect_url': f'/step3_review/{job_id}'
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[GDrive] Error: {e}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
|
|
|
chunked_uploads = {}
|
|
|
|
|
|
@app.route('/start_chunked_upload/<dataset_name>', methods=['POST'])
|
|
|
def start_chunked_upload(dataset_name):
|
|
|
"""Start a chunked upload session."""
|
|
|
try:
|
|
|
data = request.get_json()
|
|
|
total_files = data.get('total_files', 0)
|
|
|
total_chunks = data.get('total_chunks', 0)
|
|
|
|
|
|
upload_id = str(uuid.uuid4())[:8]
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(os.path.join(upload_dir, 'thumbnails'), exist_ok=True)
|
|
|
|
|
|
chunked_uploads[upload_id] = {
|
|
|
'dataset_name': dataset_name,
|
|
|
'job_id': job_id,
|
|
|
'upload_dir': upload_dir,
|
|
|
'total_files': total_files,
|
|
|
'total_chunks': total_chunks,
|
|
|
'received_chunks': set(),
|
|
|
'uploaded_filenames': []
|
|
|
}
|
|
|
|
|
|
print(f"[Chunked] Started upload session {upload_id} for dataset '{dataset_name}' ({total_files} files, {total_chunks} chunks)")
|
|
|
return jsonify({'success': True, 'upload_id': upload_id})
|
|
|
except Exception as e:
|
|
|
print(f"[Chunked] Error starting session: {e}")
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/upload_reupload_chunk/<dataset_name>', methods=['POST'])
|
|
|
def upload_reupload_chunk(dataset_name):
|
|
|
"""Receive a chunk of photos for reupload."""
|
|
|
from werkzeug.exceptions import ClientDisconnected
|
|
|
try:
|
|
|
upload_id = request.form.get('upload_id')
|
|
|
chunk_index = int(request.form.get('chunk_index', 0))
|
|
|
|
|
|
if upload_id not in chunked_uploads:
|
|
|
return jsonify({'error': 'Invalid upload session'}), 400
|
|
|
|
|
|
session_data = chunked_uploads[upload_id]
|
|
|
upload_dir = session_data['upload_dir']
|
|
|
|
|
|
files = request.files.getlist('photos')
|
|
|
if not files:
|
|
|
return jsonify({'error': 'No files in chunk'}), 400
|
|
|
|
|
|
|
|
|
for file in files:
|
|
|
if file and allowed_file(file.filename):
|
|
|
filename = secure_filename(file.filename)
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
file.save(filepath)
|
|
|
session_data['uploaded_filenames'].append(filename)
|
|
|
|
|
|
session_data['received_chunks'].add(chunk_index)
|
|
|
print(f"[Chunked] Upload {upload_id}: Received chunk {chunk_index + 1}/{session_data['total_chunks']} ({len(files)} files)")
|
|
|
|
|
|
return jsonify({'success': True, 'chunk': chunk_index, 'files_saved': len(files)})
|
|
|
except ClientDisconnected:
|
|
|
|
|
|
print(f"[Chunked] Client disconnected during chunk upload (timeout)")
|
|
|
return jsonify({'error': 'Connection timeout - please retry'}), 408
|
|
|
except Exception as e:
|
|
|
print(f"[Chunked] Error receiving chunk: {e}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/finish_chunked_upload/<dataset_name>', methods=['POST'])
|
|
|
def finish_chunked_upload(dataset_name):
|
|
|
"""Finalize chunked upload and process photos."""
|
|
|
try:
|
|
|
data = request.get_json()
|
|
|
upload_id = data.get('upload_id')
|
|
|
|
|
|
if upload_id not in chunked_uploads:
|
|
|
return jsonify({'error': 'Invalid upload session'}), 400
|
|
|
|
|
|
session_data = chunked_uploads[upload_id]
|
|
|
job_id = session_data['job_id']
|
|
|
upload_dir = session_data['upload_dir']
|
|
|
uploaded_filenames = session_data['uploaded_filenames']
|
|
|
|
|
|
print(f"[Chunked] Finalizing upload {upload_id}: {len(uploaded_filenames)} files received")
|
|
|
|
|
|
|
|
|
print(f"[Chunked] Loading dataset from Supabase...")
|
|
|
supabase_data = load_dataset_from_supabase(dataset_name)
|
|
|
if not supabase_data:
|
|
|
return jsonify({'error': 'Dataset not found in Supabase'}), 404
|
|
|
|
|
|
metadata = supabase_data.get('metadata', {})
|
|
|
face_results = supabase_data.get('face_results', {})
|
|
|
embeddings_data = supabase_data.get('embeddings_data')
|
|
|
|
|
|
|
|
|
new_session_id = str(uuid.uuid4())[:8]
|
|
|
if embeddings_data:
|
|
|
import io
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
data_np = np.load(io.BytesIO(embeddings_data), allow_pickle=True)
|
|
|
matcher = FaceMatcher(similarity_threshold=float(data_np['threshold']))
|
|
|
matcher.reference_embeddings = list(data_np['embeddings'])
|
|
|
matcher.average_embedding = data_np['average']
|
|
|
face_matchers[new_session_id] = matcher
|
|
|
session['face_session_id'] = new_session_id
|
|
|
print(f"[Chunked] Loaded {len(matcher.reference_embeddings)} reference embeddings")
|
|
|
|
|
|
|
|
|
filtered_photos = face_results.get('filtered_photos', [])
|
|
|
uploaded_set = set(uploaded_filenames)
|
|
|
matched_photos = [p for p in filtered_photos if p.get('filename') in uploaded_set]
|
|
|
|
|
|
print(f"[Chunked] Matched {len(matched_photos)} of {len(filtered_photos)} photos")
|
|
|
|
|
|
|
|
|
review_data = {
|
|
|
'filtered_photos': matched_photos,
|
|
|
'total_processed': len(uploaded_filenames),
|
|
|
'match_count': len(matched_photos)
|
|
|
}
|
|
|
|
|
|
with open(os.path.join(RESULTS_FOLDER, f"{job_id}_review.json"), 'w') as f:
|
|
|
json.dump(review_data, f)
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'review_pending',
|
|
|
'progress': 100,
|
|
|
'message': 'Photos matched with saved face results',
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': new_session_id,
|
|
|
'has_reference_photos': True,
|
|
|
'reference_count': metadata.get('reference_count', 0),
|
|
|
'quality_mode': metadata.get('quality_mode', 'balanced'),
|
|
|
'similarity_threshold': metadata.get('similarity_threshold', 0.4),
|
|
|
'confirmed_photos': [p['filename'] for p in matched_photos],
|
|
|
'review_data': review_data,
|
|
|
'total_photos': len(matched_photos),
|
|
|
'from_dataset': dataset_name,
|
|
|
'from_supabase': True
|
|
|
}
|
|
|
|
|
|
|
|
|
del chunked_uploads[upload_id]
|
|
|
|
|
|
print(f"[Chunked] SUCCESS! Redirecting to step3_review/{job_id}")
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'job_id': job_id,
|
|
|
'matched_photos': len(matched_photos),
|
|
|
'total_uploaded': len(uploaded_filenames),
|
|
|
'redirect_url': f'/step3_review/{job_id}'
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Chunked] Error finalizing: {e}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
@app.route('/process_reupload/<dataset_name>', methods=['POST'])
|
|
|
def process_reupload(dataset_name):
|
|
|
"""Process re-uploaded photos using saved face results from Supabase."""
|
|
|
from werkzeug.exceptions import ClientDisconnected
|
|
|
try:
|
|
|
print(f"[Reupload] Starting reupload for dataset '{dataset_name}'")
|
|
|
|
|
|
|
|
|
print(f"[Reupload] Loading dataset from Supabase...")
|
|
|
supabase_data = load_dataset_from_supabase(dataset_name)
|
|
|
if not supabase_data:
|
|
|
print(f"[Reupload] ERROR: Dataset not found in Supabase")
|
|
|
return jsonify({'error': 'Dataset not found in Supabase'}), 404
|
|
|
|
|
|
metadata = supabase_data.get('metadata', {})
|
|
|
face_results = supabase_data.get('face_results', {})
|
|
|
embeddings_data = supabase_data.get('embeddings_data')
|
|
|
print(f"[Reupload] Dataset loaded: {len(face_results.get('filtered_photos', []))} photos in face results")
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
new_session_id = str(uuid.uuid4())[:8]
|
|
|
upload_dir = os.path.join(UPLOAD_FOLDER, job_id)
|
|
|
os.makedirs(upload_dir, exist_ok=True)
|
|
|
os.makedirs(os.path.join(upload_dir, 'thumbnails'), exist_ok=True)
|
|
|
|
|
|
|
|
|
zipfile_upload = request.files.get('zipfile')
|
|
|
uploaded_filenames = []
|
|
|
|
|
|
if zipfile_upload and zipfile_upload.filename.lower().endswith('.zip'):
|
|
|
|
|
|
import zipfile
|
|
|
print(f"[Reupload] Received zip file: {zipfile_upload.filename}")
|
|
|
|
|
|
|
|
|
zip_path = os.path.join(upload_dir, 'upload.zip')
|
|
|
zipfile_upload.save(zip_path)
|
|
|
print(f"[Reupload] Zip saved, extracting...")
|
|
|
|
|
|
|
|
|
try:
|
|
|
with zipfile.ZipFile(zip_path, 'r') as zf:
|
|
|
|
|
|
image_extensions = {'.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp', '.bmp', '.gif'}
|
|
|
for member in zf.namelist():
|
|
|
|
|
|
if member.endswith('/') or '/__MACOSX' in member or member.startswith('.'):
|
|
|
continue
|
|
|
|
|
|
ext = os.path.splitext(member.lower())[1]
|
|
|
if ext in image_extensions:
|
|
|
|
|
|
filename = secure_filename(os.path.basename(member))
|
|
|
if filename:
|
|
|
|
|
|
with zf.open(member) as src:
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
with open(filepath, 'wb') as dst:
|
|
|
dst.write(src.read())
|
|
|
uploaded_filenames.append(filename)
|
|
|
|
|
|
if len(uploaded_filenames) % 200 == 0:
|
|
|
print(f"[Reupload] Extracted {len(uploaded_filenames)} files...")
|
|
|
|
|
|
print(f"[Reupload] Extracted {len(uploaded_filenames)} photos from zip")
|
|
|
finally:
|
|
|
|
|
|
if os.path.exists(zip_path):
|
|
|
os.remove(zip_path)
|
|
|
else:
|
|
|
|
|
|
files = request.files.getlist('photos')
|
|
|
if not files or (len(files) == 1 and files[0].filename == ''):
|
|
|
print(f"[Reupload] ERROR: No photos uploaded")
|
|
|
return jsonify({'error': 'No photos uploaded'}), 400
|
|
|
|
|
|
print(f"[Reupload] Saving {len(files)} uploaded files (thumbnails skipped for speed)...")
|
|
|
for i, file in enumerate(files):
|
|
|
if file and allowed_file(file.filename):
|
|
|
filename = secure_filename(file.filename)
|
|
|
filepath = os.path.join(upload_dir, filename)
|
|
|
file.save(filepath)
|
|
|
uploaded_filenames.append(filename)
|
|
|
|
|
|
|
|
|
if (i + 1) % 200 == 0:
|
|
|
print(f"[Reupload] Saved {i + 1}/{len(files)} files...")
|
|
|
|
|
|
print(f"[Reupload] Saved {len(uploaded_filenames)} photos for dataset '{dataset_name}'")
|
|
|
|
|
|
|
|
|
print(f"[Reupload] Loading reference embeddings...")
|
|
|
if embeddings_data:
|
|
|
import io
|
|
|
from photo_selector.face_matcher import FaceMatcher
|
|
|
|
|
|
|
|
|
data = np.load(io.BytesIO(embeddings_data), allow_pickle=True)
|
|
|
matcher = FaceMatcher(similarity_threshold=float(data['threshold']))
|
|
|
matcher.reference_embeddings = list(data['embeddings'])
|
|
|
matcher.average_embedding = data['average']
|
|
|
face_matchers[new_session_id] = matcher
|
|
|
session['face_session_id'] = new_session_id
|
|
|
print(f"[Reupload] Loaded {len(matcher.reference_embeddings)} reference embeddings")
|
|
|
|
|
|
|
|
|
print(f"[Reupload] Matching uploaded files with saved face results...")
|
|
|
filtered_photos = face_results.get('filtered_photos', [])
|
|
|
|
|
|
|
|
|
uploaded_set = set(uploaded_filenames)
|
|
|
|
|
|
|
|
|
matched_photos = []
|
|
|
for photo in filtered_photos:
|
|
|
if photo.get('filename') in uploaded_set:
|
|
|
matched_photos.append(photo)
|
|
|
|
|
|
print(f"[Reupload] Matched {len(matched_photos)} of {len(filtered_photos)} photos from face results")
|
|
|
|
|
|
|
|
|
review_data = {
|
|
|
'filtered_photos': matched_photos,
|
|
|
'total_processed': len(uploaded_filenames),
|
|
|
'match_count': len(matched_photos)
|
|
|
}
|
|
|
|
|
|
|
|
|
with open(os.path.join(RESULTS_FOLDER, f"{job_id}_review.json"), 'w') as f:
|
|
|
json.dump(review_data, f)
|
|
|
print(f"[Reupload] Saved review data")
|
|
|
|
|
|
|
|
|
processing_jobs[job_id] = {
|
|
|
'status': 'review_pending',
|
|
|
'progress': 100,
|
|
|
'message': 'Photos matched with saved face results',
|
|
|
'upload_dir': upload_dir,
|
|
|
'session_id': new_session_id,
|
|
|
'has_reference_photos': True,
|
|
|
'reference_count': metadata.get('reference_count', 0),
|
|
|
'quality_mode': metadata.get('quality_mode', 'balanced'),
|
|
|
'similarity_threshold': metadata.get('similarity_threshold', 0.4),
|
|
|
'confirmed_photos': [p['filename'] for p in matched_photos],
|
|
|
'review_data': review_data,
|
|
|
'total_photos': len(matched_photos),
|
|
|
'from_dataset': dataset_name,
|
|
|
'from_supabase': True
|
|
|
}
|
|
|
|
|
|
print(f"[Reupload] SUCCESS! Redirecting to step3_review/{job_id}")
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'job_id': job_id,
|
|
|
'matched_photos': len(matched_photos),
|
|
|
'total_uploaded': len(uploaded_filenames),
|
|
|
'redirect_url': f'/step3_review/{job_id}'
|
|
|
})
|
|
|
|
|
|
except ClientDisconnected:
|
|
|
print(f"[Reupload] Client disconnected during upload (timeout)")
|
|
|
return jsonify({'error': 'Connection timeout - please retry with smaller batch or better connection'}), 408
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
print("""
|
|
|
============================================
|
|
|
PHOTO SELECTION WEB APP
|
|
|
Open http://localhost:5000 in your browser
|
|
|
|
|
|
NEW: Automatic selection mode!
|
|
|
The AI decides which photos to keep.
|
|
|
|
|
|
TEST: /test-month for single folder testing
|
|
|
============================================
|
|
|
""")
|
|
|
|
|
|
import os
|
|
|
port = int(os.environ.get('PORT', 7860))
|
|
|
app.run(debug=False, host='0.0.0.0', port=port) |