Spaces:

Hamzah-ALQadasi
/

Video_Authenticity.Anamoly_Detection.Temporal_Modeling

Sleeping

App Files Files Community

Hamzah commited on Nov 26, 2025

Commit

139a373

1 Parent(s): f297659

first commit

Browse files

Files changed (3) hide show

main.py +600 -0
outliers_removal_algorithm.py +206 -0
reorder_frames_algorithm.py +380 -0

main.py ADDED Viewed

	@@ -0,0 +1,600 @@

+#!/usr/bin/env python3
+"""
+Main script for video processing: outlier detection and/or frame reordering.
+Place your videos in the './inference' folder and run this script to process them.
+Processed videos will be saved with '_fixed' suffix.
+This script can perform:
+1. Outlier detection only (--task outliers)
+2. Frame reordering only (--task reorder)
+3. Both operations (--task both): outlier detection first, then reordering
+Uses DBSCAN for outlier detection.
+Usage:
+  # Process all videos in ./inference folder
+  python main.py --input-dir ./inference --task both
+  # Process a single video from inference folder
+  python main.py --video ./inference/my_video.avi --task both
+  # Custom output directory (save to outlier_artifacts)
+  python main.py --input-dir ./inference --task outliers --output-dir ./outlier_artifacts/cleaned_videos
+  # Custom DBSCAN parameters
+  python main.py --input-dir ./inference --task both --eps 0.5 --min-samples 40
+  # Process videos from UCF101_videos with custom model (DINOv2)
+  python main.py --input-dir ./UCF101_videos --task outliers --model-type dinov2
+  # Process videos with ResNet18 model
+  python main.py --input-dir ./inference --task outliers --model-type resnet18
+Output:
+  - Default: Videos saved in same directory as input with '_fixed' suffix
+  - With --output-dir: Videos saved in specified directory with '_fixed' suffix
+  - Outlier detection: video_fixed.avi (outliers removed)
+  - Frame reordering: video_fixed.avi (frames reordered)
+  - Both: video_fixed.avi (outliers removed AND frames reordered, no intermediate files)
+"""
+import os
+import argparse
+import glob
+from pathlib import Path
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+from outliers_removal_algorithm import dbscan_outliers, USE_GPU
+from reorder_frames_algorithm import load_video_gray, compute_blurred_mse_matrix, build_best_path
+# Device configuration
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Supported video extensions
+VIDEO_EXTS = ('.avi', '.mp4', '.mov', '.mkv')
+# ==========================================
+# EMBEDDING EXTRACTION (Outlier Detection)
+# ==========================================
+def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
+    """Load CLIP, DINOv2, or ResNet18 model for embedding extraction."""
+    print(f"Loading {model_type.upper()} model...")
+    if model_type == 'clip':
+        import clip
+        model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
+        model.eval()
+        torch.set_grad_enabled(False)
+        embedding_dim = 512
+        def extract_fn(image_batch):
+            with torch.no_grad():
+                feats = model.encode_image(image_batch)
+                feats = torch.nn.functional.normalize(feats, dim=-1)
+            return feats
+        print(f"CLIP model loaded: ViT-B/32 ({embedding_dim}-dim)")
+        return extract_fn, preprocess, embedding_dim
+    elif model_type == 'dinov2':
+        from transformers import pipeline
+        from torchvision import transforms
+        if model_path is None:
+            model_path = "facebook/dinov2-base"
+        feature_extractor = pipeline(
+            model=model_path,
+            task="image-feature-extraction",
+            device=0 if (device == 'cuda' and torch.cuda.is_available()) else -1
+        )
+        test_img = Image.new('RGB', (224, 224))
+        test_emb = feature_extractor(test_img)
+        embedding_dim = len(test_emb[0])
+        preprocess = transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+        def extract_fn(image_batch):
+            images = []
+            for i in range(image_batch.shape[0]):
+                img_tensor = image_batch[i]
+                img_np = img_tensor.cpu().permute(1, 2, 0).numpy()
+                img_np = img_np * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
+                img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
+                images.append(Image.fromarray(img_np))
+            features = feature_extractor(images)
+            feats = torch.tensor(features, device=device).squeeze(1)
+            feats = torch.nn.functional.normalize(feats, dim=-1)
+            return feats
+        print(f"DINOv2 model loaded: {model_path} ({embedding_dim}-dim)")
+        return extract_fn, preprocess, embedding_dim
+    elif model_type == 'resnet18':
+        from torchvision import models, transforms
+        # Load ResNet18 pretrained model
+        model = models.resnet18(pretrained=True)
+        # Remove the final classification layer to get embeddings
+        model = torch.nn.Sequential(*list(model.children())[:-1])
+        model = model.to(device)
+        model.eval()
+        torch.set_grad_enabled(False)
+        embedding_dim = 512  # ResNet18 final layer dimension
+        preprocess = transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+        def extract_fn(image_batch):
+            with torch.no_grad():
+                feats = model(image_batch)
+                feats = feats.squeeze(-1).squeeze(-1)  # Remove spatial dimensions
+                feats = torch.nn.functional.normalize(feats, dim=-1)
+            return feats
+        print(f"ResNet18 model loaded ({embedding_dim}-dim)")
+        return extract_fn, preprocess, embedding_dim
+    else:
+        raise ValueError(f"Unknown model type: {model_type}")
+def extract_video_embeddings(video_path, extract_fn, preprocess, device='cuda', batch_size=128):
+    """Extract embeddings for all frames in a video."""
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        raise ValueError(f"Cannot open video: {video_path}")
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    print(f"Video: {Path(video_path).name}")
+    print(f"Properties: {width}x{height}, {fps:.2f} fps, {total_frames} frames")
+    print(f"Extracting embeddings with batch_size={batch_size}...")
+    frame_batch = []
+    all_embeddings = []
+    with tqdm(total=total_frames, desc="Extracting", unit="frame") as pbar:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(frame_rgb)
+            frame_tensor = preprocess(pil_image)
+            frame_batch.append(frame_tensor)
+            if len(frame_batch) >= batch_size:
+                batch = torch.stack(frame_batch, dim=0)
+                if device == 'cuda':
+                    batch = batch.pin_memory().to(device, non_blocking=True)
+                else:
+                    batch = batch.to(device)
+                feats = extract_fn(batch)
+                all_embeddings.append(feats.cpu())
+                frame_batch.clear()
+                pbar.update(batch_size)
+        if frame_batch:
+            batch = torch.stack(frame_batch, dim=0)
+            if device == 'cuda':
+                batch = batch.pin_memory().to(device, non_blocking=True)
+            else:
+                batch = batch.to(device)
+            feats = extract_fn(batch)
+            all_embeddings.append(feats.cpu())
+            pbar.update(len(frame_batch))
+    cap.release()
+    embeddings = torch.cat(all_embeddings, dim=0)
+    print(f"Extracted {len(embeddings)} embeddings")
+    return embeddings, fps, width, height
+# ==========================================
+# VIDEO SAVING
+# ==========================================
+def save_cleaned_video(video_path, predictions, output_path, fps, width, height):
+    """Create cleaned video with outliers removed."""
+    num_outliers = predictions.sum()
+    num_inliers = len(predictions) - num_outliers
+    print(f"\nOutlier Detection Results:")
+    print(f"  Total frames: {len(predictions)}")
+    print(f"  Inliers: {num_inliers} ({100*num_inliers/len(predictions):.1f}%)")
+    print(f"  Outliers: {num_outliers} ({100*num_outliers/len(predictions):.1f}%)")
+    cap = cv2.VideoCapture(str(video_path))
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+    frame_id = 0
+    kept = 0
+    print(f"\nGenerating cleaned video: {Path(output_path).name}")
+    with tqdm(total=len(predictions), desc="Writing", unit="frame") as pbar:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_id < len(predictions) and not predictions[frame_id]:
+                out.write(frame)
+                kept += 1
+            frame_id += 1
+            pbar.update(1)
+    cap.release()
+    out.release()
+    print(f"Cleaned video saved: {output_path}")
+    return output_path
+def save_reordered_video(video_path, frame_order, output_path):
+    """Create reordered video using predicted frame order."""
+    # Load all frames
+    cap = cv2.VideoCapture(str(video_path))
+    frames = []
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frames.append(frame)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    height, width = frames[0].shape[:2]
+    cap.release()
+    print(f"\nFrame Reordering Results:")
+    print(f"  Total frames: {len(frames)}")
+    print(f"  Reconstructed order: {len(frame_order)} frames")
+    # Write reordered video
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+    print(f"\nGenerating reordered video: {Path(output_path).name}")
+    for idx in tqdm(frame_order, desc="Writing", unit="frame"):
+        if 0 <= idx < len(frames):
+            out.write(frames[idx])
+    out.release()
+    print(f"Reordered video saved: {output_path}")
+    return output_path
+def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_order, output_path):
+    """Create video with outliers removed and frames reordered in one pass."""
+    # Load all frames
+    cap = cv2.VideoCapture(str(video_path))
+    all_frames = []
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        all_frames.append(frame)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    height, width = all_frames[0].shape[:2]
+    cap.release()
+    # Filter out outliers
+    inlier_frames = [all_frames[i] for i in range(len(all_frames))
+                     if i < len(outlier_predictions) and not outlier_predictions[i]]
+    num_outliers = outlier_predictions.sum()
+    print(f"\nCombined Processing Results:")
+    print(f"  Original frames: {len(all_frames)}")
+    print(f"  Outliers removed: {num_outliers} ({100*num_outliers/len(all_frames):.1f}%)")
+    print(f"  Inlier frames: {len(inlier_frames)} ({100*len(inlier_frames)/len(all_frames):.1f}%)")
+    print(f"  Reordered frames: {len(frame_order)}")
+    # Write reordered video with only inlier frames
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+    print(f"\nGenerating final video: {Path(output_path).name}")
+    for idx in tqdm(frame_order, desc="Writing", unit="frame"):
+        if 0 <= idx < len(inlier_frames):
+            out.write(inlier_frames[idx])
+    out.release()
+    print(f"Final video saved: {output_path}")
+    return output_path
+# ==========================================
+# MAIN PIPELINE
+# ==========================================
+def run_outlier_detection(video_path, output_path, args):
+    """Run outlier detection pipeline using imported functions."""
+    print("OUTLIER DETECTION")
+    print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
+    # Load embedding model
+    extract_fn, preprocess, embedding_dim = load_embedding_model(
+        model_type=args.model_type,
+        model_path=args.model_path,
+        device=DEVICE
+    )
+    # Extract embeddings
+    embeddings, fps, width, height = extract_video_embeddings(
+        video_path, extract_fn, preprocess, DEVICE, args.batch_size
+    )
+    # Detect outliers using DBSCAN
+    print(f"\nRunning DBSCAN outlier detection...")
+    predictions = dbscan_outliers(
+        embeddings,
+        eps=args.eps,
+        min_samples=args.min_samples
+    )
+    # Save cleaned video
+    cleaned_path = save_cleaned_video(video_path, predictions, output_path, fps, width, height)
+    return cleaned_path
+def run_frame_reordering(video_path, output_path):
+    """Run frame reordering pipeline."""
+    print("\n" + "=" * 80)
+    print("FRAME REORDERING")
+    print("=" * 80)
+    print(f"Loading video: {Path(video_path).name}")
+    frames = load_video_gray(str(video_path))
+    print(f"Loaded {len(frames)} frames")
+    print("Computing MSE matrix...")
+    mse = compute_blurred_mse_matrix(frames)
+    print("Building temporal path...")
+    path = build_best_path(mse)
+    # Save reordered video
+    reordered_path = save_reordered_video(video_path, path, output_path)
+    return reordered_path
+def run_both_tasks(video_path, output_path, args):
+    """Run both outlier detection and frame reordering without saving intermediate video."""
+    print("\n" + "=" * 80)
+    print("STEP 1: OUTLIER DETECTION")
+    print("=" * 80)
+    print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
+    # Load embedding model and extract embeddings
+    extract_fn, preprocess, embedding_dim = load_embedding_model(
+        model_type=args.model_type,
+        model_path=args.model_path,
+        device=DEVICE
+    )
+    embeddings, fps, width, height = extract_video_embeddings(
+        video_path, extract_fn, preprocess, DEVICE, args.batch_size
+    )
+    # Detect outliers using DBSCAN
+    print(f"\nRunning DBSCAN outlier detection...")
+    outlier_predictions = dbscan_outliers(
+        embeddings,
+        eps=args.eps,
+        min_samples=args.min_samples
+    )
+    num_outliers = outlier_predictions.sum()
+    num_inliers = len(outlier_predictions) - num_outliers
+    print(f"\nOutlier Detection Results:")
+    print(f"  Total frames: {len(outlier_predictions)}")
+    print(f"  Inliers: {num_inliers} ({100*num_inliers/len(outlier_predictions):.1f}%)")
+    print(f"  Outliers: {num_outliers} ({100*num_outliers/len(outlier_predictions):.1f}%)")
+    # Step 2: Frame reordering on inlier frames
+    print("\n" + "=" * 80)
+    print("STEP 2: FRAME REORDERING (on inlier frames)")
+    print("=" * 80)
+    all_frames = load_video_gray(str(video_path))
+    # Filter to only inlier frames
+    inlier_frames = []
+    for i in range(len(all_frames)):
+        if i < len(outlier_predictions) and not outlier_predictions[i]:
+            inlier_frames.append(all_frames[i])
+    inlier_frames = torch.stack(inlier_frames, dim=0)
+    mse = compute_blurred_mse_matrix(inlier_frames)
+    path = build_best_path(mse)
+    # Save final video (cleaned and reordered)
+    final_path = save_cleaned_and_reordered_video(video_path, outlier_predictions, path, output_path)
+    return final_path
+def get_output_path(input_path, output_dir, suffix="_fixed"):
+    """Determine the output path based on input path and output directory."""
+    input_path = Path(input_path)
+    if output_dir:
+        # Use specified output directory
+        output_dir = Path(output_dir)
+        output_dir.mkdir(exist_ok=True, parents=True)
+        output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
+        return output_dir / output_name
+    else:
+        # Save in same directory as input
+        output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
+        return input_path.parent / output_name
+def process_single_video(video_path, args):
+    """Process a single video file."""
+    video_path = Path(video_path)
+    if not video_path.exists():
+        print(f"Error: Video not found: {video_path}")
+        return
+    print("=" * 80)
+    print(f"Processing: {video_path.name}")
+    print("=" * 80)
+    print(f"Task: {args.task.upper()}")
+    print("=" * 80)
+    # Determine output path
+    output_path = get_output_path(video_path, args.output_dir)
+    # Execute tasks
+    if args.task == "outliers":
+        run_outlier_detection(str(video_path), str(output_path), args)
+    elif args.task == "reorder":
+        run_frame_reordering(str(video_path), str(output_path))
+    elif args.task == "both":
+        # Run both tasks without saving intermediate video
+        run_both_tasks(str(video_path), str(output_path), args)
+    print("\n" + "=" * 80)
+    print("PROCESSING COMPLETE")
+    print("=" * 80)
+    print(f"Output: {output_path}")
+def process_directory(input_dir, args):
+    """Process all videos in a directory."""
+    input_dir = Path(input_dir)
+    if not input_dir.exists():
+        print(f"Error: Directory not found: {input_dir}")
+        return
+    # Find all video files
+    video_files = []
+    for ext in VIDEO_EXTS:
+        video_files.extend(input_dir.glob(f"*{ext}"))
+    video_files = sorted(video_files)
+    if not video_files:
+        print(f"No video files found in {input_dir}")
+        print(f"Supported extensions: {VIDEO_EXTS}")
+        return
+    print("=" * 80)
+    print(f"Found {len(video_files)} video(s) in {input_dir}")
+    print("=" * 80)
+    # Process each video
+    for i, video_path in enumerate(video_files, 1):
+        print(f"\n[{i}/{len(video_files)}] Processing: {video_path.name}")
+        # Determine output path
+        output_path = get_output_path(video_path, args.output_dir)
+        try:
+            # Execute tasks
+            if args.task == "outliers":
+                run_outlier_detection(str(video_path), str(output_path), args)
+            elif args.task == "reorder":
+                run_frame_reordering(str(video_path), str(output_path))
+            elif args.task == "both":
+                # Run both tasks without saving intermediate video
+                run_both_tasks(str(video_path), str(output_path), args)
+            print(f"  ✓ Saved: {output_path}")
+        except Exception as e:
+            print(f"  ✗ Error processing {video_path.name}: {e}")
+            continue
+    print("\n" + "=" * 80)
+    print("BATCH PROCESSING COMPLETE")
+    print("=" * 80)
+def main():
+    parser = argparse.ArgumentParser(
+        description="Main script for video processing: outlier detection (DBSCAN) and/or frame reordering"
+    )
+    # Input arguments (mutually exclusive)
+    input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument("--video",
+                           help="Process a single video file")
+    input_group.add_argument("--input-dir",
+                           help="Process all videos in a directory (default: ./inference)")
+    # Task selection
+    parser.add_argument("--task", required=True, choices=["outliers", "reorder", "both"],
+                       help="Task to perform: outliers, reorder, or both")
+    # Output directory (optional)
+    parser.add_argument("--output-dir",
+                       help="Output directory (default: same as input directory)")
+    # Outlier detection parameters
+    parser.add_argument("--model-type", default="clip", choices=["clip", "dinov2", "resnet18"],
+                       help="Embedding model type for outlier detection")
+    parser.add_argument("--model-path", help="Path to DINOv2 model (optional)")
+    parser.add_argument("--batch-size", type=int, default=128,
+                       help="Batch size for embedding extraction")
+    # DBSCAN parameters
+    parser.add_argument("--eps", type=float, default=0.5,
+                       help="DBSCAN: Epsilon parameter")
+    parser.add_argument("--min-samples", type=int, default=40,
+                       help="DBSCAN: Minimum samples parameter")
+    args = parser.parse_args()
+    # Default to ./inference if neither --video nor --input-dir specified
+    # (This won't happen due to required=True, but keeping for clarity)
+    if args.task in ["outliers", "both"]:
+        print(f"DBSCAN parameters: eps={args.eps}, min_samples={args.min_samples}")
+    # Process based on input mode
+    if args.video:
+        process_single_video(args.video, args)
+    elif args.input_dir:
+        process_directory(args.input_dir, args)
+if __name__ == "__main__":
+    main()

outliers_removal_algorithm.py ADDED Viewed

	@@ -0,0 +1,206 @@

+#!/usr/bin/env python3
+"""
+Outlier removal algorithm for video frame embeddings using DBSCAN.
+Reads embeddings, detects outliers, and exports predictions to CSV files.
+GPU acceleration is automatically detected and used if available.
+Usage:
+  # Process CLIP embeddings from outlier_artifacts
+  python outliers_removal_algorithm.py --embeddings-dir ./outlier_artifacts/embeddings --output-dir ./outlier_artifacts/cleaned_CSVs --model-type clip
+  # Process DINOv2 embeddings
+  python outliers_removal_algorithm.py --embeddings-dir ./outlier_artifacts/embeddings --output-dir ./outlier_artifacts/cleaned_CSVs --model-type dinov2
+  # Process ResNet18 embeddings
+  python outliers_removal_algorithm.py --embeddings-dir ./outlier_artifacts/embeddings --output-dir ./outlier_artifacts/cleaned_CSVs --model-type resnet18
+  # Custom DBSCAN parameters with CLIP embeddings
+  python outliers_removal_algorithm.py --embeddings-dir ./outlier_artifacts/embeddings --output-dir ./outlier_artifacts/cleaned_CSVs --model-type clip --eps 0.45 --min-samples 50
+  # Filter to specific action category
+  python outliers_removal_algorithm.py --embeddings-dir ./outlier_artifacts/embeddings --output-dir ./outlier_artifacts/cleaned_CSVs --model-type clip --action-filter Crawling
+  # Limit processing to first 10 videos
+  python outliers_removal_algorithm.py --embeddings-dir ./outlier_artifacts/embeddings --output-dir ./outlier_artifacts/cleaned_CSVs --model-type clip --max-videos 10
+Note: To generate cleaned videos from predictions, use generate_cleaned_videos_from_predictions.py
+"""
+import os
+import glob
+import csv
+import argparse
+import numpy as np
+import torch
+from pathlib import Path
+try:
+    import cupy as cp
+    from cuml.cluster import DBSCAN as cuDBSCAN
+    CUML_AVAILABLE = True
+except ImportError:
+    CUML_AVAILABLE = False
+from sklearn.cluster import DBSCAN as skDBSCAN
+# Automatically detect GPU availability
+USE_GPU = CUML_AVAILABLE and torch.cuda.is_available()
+def to_numpy(x):
+    """Convert tensor or array to numpy float32."""
+    if isinstance(x, torch.Tensor):
+        x = x.detach().cpu().numpy()
+    return np.asarray(x, dtype=np.float32)
+def dbscan_outliers(X, eps=0.55, min_samples=10):
+    """
+    Detect outliers using DBSCAN (noise points).
+    Args:
+        X: Feature matrix (N, D)
+        eps: DBSCAN epsilon parameter
+        min_samples: DBSCAN minimum samples parameter
+    Returns:
+        Boolean array of shape (N,) where True = outlier
+    """
+    X = to_numpy(X)
+    if USE_GPU:
+        labels = cuDBSCAN(eps=eps, min_samples=min_samples).fit_predict(cp.asarray(X)).get()
+    else:
+        labels = skDBSCAN(eps=eps, min_samples=min_samples, n_jobs=-1).fit_predict(X)
+    return labels == -1
+def extract_action_name(filename, model_type):
+    """Extract action category from embedding filename based on model type."""
+    name = os.path.basename(filename)
+    suffix = f'_{model_type}_embeddings'
+    name = name.replace(suffix + '.pt', '').replace(suffix + '.pth', '')
+    return name
+def process_all_embeddings(emb_dir, eps, min_samples, output_dir, model_type='clip',
+                           max_videos=None, action_filter=None):
+    """
+    Process all embeddings and export predictions to CSV files.
+    Args:
+        emb_dir: Directory containing embedding .pt files
+        eps: DBSCAN epsilon parameter
+        min_samples: DBSCAN minimum samples parameter
+        output_dir: Directory to save CSV predictions
+        model_type: Model type to load ('clip', 'dinov2', or 'resnet18')
+        max_videos: Limit processing to first N videos
+        action_filter: Filter to specific action category
+    """
+    # Filter files by model type (e.g., *_clip_embeddings.pt, *_dinov2_embeddings.pt, or *_resnet18_embeddings.pt)
+    pattern = f"*_{model_type}_embeddings.pt"
+    pt_files = sorted(glob.glob(os.path.join(emb_dir, pattern)))
+    if action_filter:
+        pt_files = [f for f in pt_files if action_filter.lower() in os.path.basename(f).lower()]
+        print(f"Filtering to action: {action_filter}")
+        print(f"Found {len(pt_files)} matching file(s)")
+    # Create output directory
+    output_path = Path(output_dir)
+    output_path.mkdir(exist_ok=True, parents=True)
+    print("=" * 80)
+    print("OUTLIER REMOVAL ALGORITHM - DBSCAN")
+    print("=" * 80)
+    print(f"Model type: {model_type.upper()}")
+    print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
+    print(f"Embeddings dir: {emb_dir}")
+    print(f"Output dir: {output_dir}")
+    print(f"DBSCAN parameters: eps={eps}, min_samples={min_samples}")
+    print(f"Total embedding files: {len(pt_files)}")
+    print("=" * 80)
+    total_videos = 0
+    for pt_path in pt_files:
+        data = torch.load(pt_path, map_location="cpu")
+        action_name = extract_action_name(pt_path, model_type)
+        print(f"Processing action: {action_name}")
+        # Create CSV for this action
+        csv_path = output_path / f"{action_name}.csv"
+        with open(csv_path, 'w', newline='') as csvfile:
+            writer = csv.writer(csvfile)
+            writer.writerow(['video_id', 'predicted_outliers_list'])
+            for video_name, video_data in data.items():
+                if max_videos and total_videos >= max_videos:
+                    break
+                total_videos += 1
+                embeddings = video_data["embeddings"]
+                # Run DBSCAN outlier detection
+                predictions = dbscan_outliers(embeddings, eps=eps, min_samples=min_samples)
+                # Convert boolean array to list of outlier indices
+                outlier_indices = np.where(predictions)[0].tolist()
+                outliers_str = ",".join(map(str, outlier_indices))
+                # Write to CSV
+                writer.writerow([video_name, outliers_str])
+                num_outliers = predictions.sum()
+                num_frames = len(embeddings)
+        if max_videos and total_videos >= max_videos:
+            break
+    print("\n" + "=" * 80)
+    print("PROCESSING COMPLETE")
+    print("=" * 80)
+    print(f"Total videos processed: {total_videos}")
+    print(f"CSV files saved to: {output_path.absolute()}")
+    print("\nNext step: Use generate_cleaned_videos_from_predictions.py to create cleaned videos")
+    print("=" * 80)
+def main():
+    parser = argparse.ArgumentParser(
+        description="Outlier removal algorithm using DBSCAN: detect outliers and export predictions to CSV"
+    )
+    parser.add_argument("--embeddings-dir", required=True,
+                       help="Directory containing embedding .pt files")
+    parser.add_argument("--output-dir", default="./outlier_artifacts/cleaned_CSVs",
+                       help="Directory to save prediction CSV files")
+    parser.add_argument("--model-type", type=str, choices=['clip', 'dinov2', 'resnet18'], default='clip',
+                       help="Model type to load: 'clip', 'dinov2', or 'resnet18' (default: clip)")
+    parser.add_argument("--max-videos", type=int,
+                       help="Limit processing to first N videos")
+    parser.add_argument("--action-filter",
+                       help="Filter to specific action category (e.g., 'Crawling')")
+    # DBSCAN parameters
+    parser.add_argument("--eps", type=float, default=0.5,
+                       help="DBSCAN: Epsilon parameter")
+    parser.add_argument("--min-samples", type=int, default=40,
+                       help="DBSCAN: Minimum samples parameter")
+    args = parser.parse_args()
+    process_all_embeddings(
+        emb_dir=args.embeddings_dir,
+        eps=args.eps,
+        min_samples=args.min_samples,
+        output_dir=args.output_dir,
+        model_type=args.model_type,
+        max_videos=args.max_videos,
+        action_filter=args.action_filter
+    )
+if __name__ == "__main__":
+    main()

reorder_frames_algorithm.py ADDED Viewed

	@@ -0,0 +1,380 @@

+#!/usr/bin/env python3
+"""
+Frame order reconstruction algorithm using MSE and greedy path construction.
+Reconstructs temporal frame order from shuffled videos using grayscale MSE matrix,
+MST diameter endpoints, and double-ended greedy path building with local refinement.
+Usage:
+  # Process shuffled videos and CSVs from shuffled_artifacts
+  python reorder_frames_algorithm.py --csv_dir ./shuffled_artifacts/shuffled_CSVs --videos_dir ./shuffled_artifacts/shuffled_videos --out_dir ./shuffled_artifacts/ordered_CSVs
+Note: To generate reordered videos from predictions, use generate_ordered_videos_from_predictions.py
+"""
+import argparse
+import os
+import glob
+import cv2
+import numpy as np
+import pandas as pd
+import torch
+# =========================
+# Config
+# =========================
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+IMG_SIZE = 64
+VIDEO_EXTS = (".avi", ".mp4", ".mov", ".mkv")
+# =========================
+# Pairwise MSE on GPU
+# =========================
+def compute_mse_matrix(frames: torch.Tensor) -> torch.Tensor:
+    """
+    frames: [N, 1, H, W] on DEVICE
+    Returns:
+        mse[i,j]: mean squared error between frame i and j
+    """
+    N = frames.shape[0]
+    flat = frames.view(N, -1).float()           # [N, D]
+    sq = (flat ** 2).sum(dim=1, keepdim=True)   # [N,1]
+    dist2 = sq + sq.t() - 2.0 * (flat @ flat.t())
+    dist2 = torch.clamp(dist2, min=0.0)
+    D = flat.shape[1]
+    mse = dist2 / D
+    mse.fill_diagonal_(0.0)
+    return mse
+# =========================
+# Utils
+# =========================
+def _mst_endpoints_via_diameter(mse: torch.Tensor):
+    """
+    Build an MST on the dense MSE matrix (edge weights = mse).
+    Return (u, v) = endpoints of the MST diameter (longest weighted path).
+    """
+    N = mse.shape[0]
+    if N <= 1:
+        return (0, 0)
+    device = mse.device
+    used = torch.zeros(N, dtype=torch.bool, device=device)
+    dist = torch.full((N,), float('inf'), device=device)
+    parent = torch.full((N,), -1, dtype=torch.long, device=device)
+    # start Prim from node 0
+    used[0] = True
+    dist = mse[0].clone()
+    dist[0] = float('inf')
+    for _ in range(N - 1):
+        masked = dist.clone()
+        masked[used] = float('inf')
+        j = int(torch.argmin(masked).item())
+        used[j] = True
+        # relax edges to unused nodes
+        w = mse[j]
+        update_mask = (~used) & (w < dist)
+        dist[update_mask] = w[update_mask]
+        parent[update_mask] = j
+    # build adjacency list of the MST
+    adj = [[] for _ in range(N)]
+    for v in range(1, N):
+        u = int(parent[v].item())
+        if u >= 0:
+            w = float(mse[u, v].item())
+            adj[u].append((v, w))
+            adj[v].append((u, w))
+    def _farthest(src: int):
+        # single-source longest distances on a tree via DFS
+        distv = [-1.0] * N
+        distv[src] = 0.0
+        stack = [src]
+        while stack:
+            x = stack.pop()
+            for y, w in adj[x]:
+                if distv[y] < 0.0:
+                    distv[y] = distv[x] + w
+                    stack.append(y)
+        far = max(range(N), key=lambda k: distv[k])
+        return far, distv[far]
+    a, _ = _farthest(0)
+    b, _ = _farthest(a)
+    return a, b
+def double_ended_greedy_from_pair(left: int, right: int, mse: torch.Tensor):
+    """
+    Maintain a path [left ... right]. At each step, attach the unused frame
+    with minimal MSE to either end (choose the cheaper side).
+    """
+    N = mse.shape[0]
+    used = torch.zeros(N, dtype=torch.bool, device=mse.device)
+    used[left] = True
+    used[right] = True
+    path = [left, right]
+    inf = float('inf')
+    for _ in range(N - 2):
+        # best to left
+        candL = mse[:, left].clone()
+        candL[used] = inf
+        kL = int(torch.argmin(candL).item())
+        dL = float(candL[kL])
+        # best to right
+        candR = mse[:, right].clone()
+        candR[used] = inf
+        kR = int(torch.argmin(candR).item())
+        dR = float(candR[kR])
+        if dL <= dR:
+            path.insert(0, kL)
+            used[kL] = True
+            left = kL
+        else:
+            path.append(kR)
+            used[kR] = True
+            right = kR
+    return path
+def parse_shuffled_list(s: str):
+    """
+    Parse 'shuffled_frames_list' column.
+    Example cell:
+        "130,288,254,17,63,..."
+    """
+    return [int(x) for x in str(s).split(",") if x.strip() != ""]
+def find_video_path(video_id: str, videos_dir: str) -> str:
+    """
+    Resolve the video path for a given video_id.
+    Tries:
+      - videos_dir / "<video_id>"
+      - videos_dir / "<video_id>.avi"
+      - videos_dir / "<video_id>.*" where extension in VIDEO_EXTS
+    """
+    # direct exact path (some CSVs store full filename)
+    direct = os.path.join(videos_dir, video_id)
+    if os.path.isfile(direct):
+        return direct
+    # try with .avi extension
+    direct_avi = direct + ".avi"
+    if os.path.isfile(direct_avi):
+        return direct_avi
+    # fallback: any file that starts with video_id
+    pattern = os.path.join(videos_dir, f"{video_id}*")
+    candidates = [
+        p for p in glob.glob(pattern)
+        if os.path.splitext(p)[1].lower() in VIDEO_EXTS
+    ]
+    if not candidates:
+        raise FileNotFoundError(
+            f"No video file found for video_id={video_id} in {videos_dir}"
+        )
+    # deterministic choice
+    candidates.sort(key=lambda x: (len(os.path.basename(x)), x))
+    return candidates[0]
+# =========================
+# Video loading (grayscale)
+# =========================
+def load_video_gray(video_path: str, expected_num_frames: int = None) -> torch.Tensor:
+    """
+    Load frames from a shuffled video as grayscale,
+    resize to IMG_SIZE, and send to DEVICE.
+    Returns:
+        frames: [N, 1, H, W] float32 in [0,1] on DEVICE
+    """
+    if not os.path.isfile(video_path):
+        raise FileNotFoundError(f"Video not found: {video_path}")
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise IOError(f"Cannot open video: {video_path}")
+    frames = []
+    while True:
+        ok, frame = cap.read()
+        if not ok:
+            break
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        gray = cv2.resize(gray, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_AREA)
+        frames.append(gray)
+    cap.release()
+    if len(frames) == 0:
+        raise ValueError(f"No frames read from {video_path}")
+    if expected_num_frames is not None and len(frames) != expected_num_frames:
+        print(
+            f"[WARN] {os.path.basename(video_path)}: "
+            f"expected_num_frames={expected_num_frames}, read={len(frames)}"
+        )
+    arr = np.stack(frames, axis=0)          # [N, H, W]
+    t = torch.from_numpy(arr).float()       # [N, H, W]
+    t = t.unsqueeze(1) / 255.0              # [N, 1, H, W] in [0,1]
+    return t.to(DEVICE)
+# =========================
+# Path construction
+# =========================
+def build_best_path(mse: torch.Tensor):
+    """Build temporal path using MST diameter endpoints and double-ended greedy growth."""
+    N = mse.shape[0]
+    if N <= 2:
+        return list(range(N))
+    # smart seed via MST diameter
+    a, b = _mst_endpoints_via_diameter(mse)
+    # grow from both ends
+    path = double_ended_greedy_from_pair(a, b, mse)
+    return path
+# =========================
+# Per-video prediction
+# =========================
+def predict_order_for_video(video_id: str,
+                            shuffled_order,
+                            videos_dir: str):
+    """
+    Pipeline for a single video_id:
+      - load shuffled video frames
+      - compute MSE matrix
+      - build best greedy path
+      - refine path
+      - map positions to original frame indices
+    """
+    shuffled_order = list(shuffled_order)
+    expected_num_frames = len(shuffled_order)
+    video_path = find_video_path(video_id, videos_dir)
+    frames = load_video_gray(video_path, expected_num_frames=expected_num_frames)
+    frames = frames[:, 0:1, :, :]  # use only Y channel for MSE
+    N = frames.shape[0]
+    if N != expected_num_frames:
+        print(
+            f"[WARN] {video_id}: csv_frames={expected_num_frames}, "
+            f"video_frames={N}. Using min of both."
+        )
+        m = min(expected_num_frames, N)
+        shuffled_order = shuffled_order[:m]
+        frames = frames[:m]
+        N = m
+    if N <= 1:
+        return [int(x) for x in shuffled_order]
+    mse = compute_mse_matrix(frames)
+    path = build_best_path(mse)
+    predicted = [int(shuffled_order[idx]) for idx in path]
+    return predicted
+# =========================
+# Process all CSVs
+# =========================
+def process_all_csvs(csv_dir: str, videos_dir: str, out_dir: str):
+    """
+    For each CSV in csv_dir:
+      - read video_id, shuffled_frames_list
+      - compute predicted order for each video
+      - write a prediction CSV with same filename into out_dir
+    """
+    os.makedirs(out_dir, exist_ok=True)
+    csv_paths = sorted(glob.glob(os.path.join(csv_dir, "*.csv")))
+    if not csv_paths:
+        raise FileNotFoundError(f"No CSV files found in {csv_dir}")
+    for csv_path in csv_paths:
+        df = pd.read_csv(csv_path)
+        rows = []
+        if "video_id" not in df.columns or "shuffled_frames_list" not in df.columns:
+            raise ValueError(
+                f"CSV {csv_path} must contain 'video_id' and 'shuffled_frames_list' columns."
+            )
+        for _, row in df.iterrows():
+            video_id = str(row["video_id"]).strip()
+            shuffled_order = parse_shuffled_list(row["shuffled_frames_list"])
+            pred = predict_order_for_video(video_id, shuffled_order, videos_dir)
+            pred_str = ",".join(str(x) for x in pred)
+            rows.append({"video_id": video_id, "predicted_frames_list": pred_str})
+        out_csv = os.path.join(out_dir, os.path.basename(csv_path))
+        pd.DataFrame(rows).to_csv(out_csv, index=False)
+        print(f"[OK] {os.path.basename(csv_path)} -> {os.path.basename(out_csv)}")
+# =========================
+# CLI
+# =========================
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Reconstruct frame order from shuffled videos "
+                    "using grayscale MSE and CSV metadata."
+    )
+    parser.add_argument(
+        "--csv_dir",
+        type=str,
+        required=True,
+        help="Directory with shuffled CSV files (e.g. shuffled_csvs).",
+    )
+    parser.add_argument(
+        "--videos_dir",
+        type=str,
+        required=True,
+        help="Directory with shuffled videos (e.g. UCF101_videos_shuffled).",
+    )
+    parser.add_argument(
+        "--out_dir",
+        type=str,
+        default="./shuffled_artifacts/ordered_CSVs",
+        help="Output directory for prediction CSVs.",
+    )
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    process_all_csvs(args.csv_dir, args.videos_dir, args.out_dir)
+if __name__ == "__main__":
+    main()