halqadasi commited on
Commit
a82c2e6
·
1 Parent(s): 85a80ea

update the table to video gallery

Browse files
Files changed (2) hide show
  1. app.py +4 -28
  2. main.py +0 -70
app.py CHANGED
@@ -10,12 +10,10 @@ import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
12
 
13
- # Make project root importable so we can reuse main.py
14
  ROOT_DIR = Path(__file__).resolve().parent
15
  if str(ROOT_DIR) not in sys.path:
16
  sys.path.append(str(ROOT_DIR))
17
 
18
- # Import only the existing pipelines from main.py
19
  from main import run_both_tasks, run_frame_reordering, run_outlier_detection # type: ignore
20
 
21
 
@@ -43,7 +41,6 @@ def _get_example_videos_per_task(max_examples: int = 7) -> Dict[str, List[List[o
43
  tasks = ("outliers", "reorder", "both")
44
  examples: Dict[str, List[List[object]]] = {t: [] for t in tasks}
45
 
46
- # List of video files to download (add your actual filenames here)
47
  video_files = [
48
  "v_FieldHockeyPenalty_g23_c04.mp4",
49
  "v_BalanceBeam_g11_c03.mp4",
@@ -54,36 +51,24 @@ def _get_example_videos_per_task(max_examples: int = 7) -> Dict[str, List[List[o
54
  "v_BalanceBeam_g13_c01.mp4",
55
  ]
56
 
57
- # Create a temp directory for examples
58
  temp_dir = Path(tempfile.gettempdir()) / "gradio_examples"
59
  temp_dir.mkdir(exist_ok=True)
60
 
61
  try:
62
  for video_file in video_files[:max_examples]:
63
- # Download video from HF dataset
64
  cached_path = hf_hub_download(
65
  repo_id=dataset_repo,
66
  filename=video_file,
67
  repo_type="dataset"
68
  )
69
 
70
- # Copy to temp directory to avoid Gradio path restrictions
71
  temp_video_path = temp_dir / video_file
72
  if not temp_video_path.exists():
73
  shutil.copy2(cached_path, temp_video_path)
74
 
75
  for t in tasks:
76
  if len(examples[t]) < max_examples:
77
- # Inputs: video, model_type, eps, min_samples, batch_size
78
- examples[t].append(
79
- [
80
- str(temp_video_path),
81
- "clip",
82
- 0.5,
83
- 40,
84
- 64,
85
- ]
86
- )
87
 
88
  if all(len(examples[t]) >= max_examples for t in tasks):
89
  break
@@ -125,7 +110,6 @@ def process_video(
125
  if not input_path.exists():
126
  raise gr.Error(f"Video not found: {input_path}")
127
 
128
- # Outputs are written to a local folder relative to the project root.
129
  outputs_root = ROOT_DIR / "hf_space_outputs"
130
  outputs_root.mkdir(parents=True, exist_ok=True)
131
 
@@ -153,7 +137,7 @@ def _build_task_tab(
153
  task_value: str,
154
  title: str,
155
  description: str,
156
- examples: List[List[object]],
157
  ):
158
  """Create a single tab for a specific task."""
159
  with gr.Tab(title):
@@ -196,14 +180,12 @@ def _build_task_tab(
196
  with gr.Column():
197
  output_video = gr.Video(label="Processed video")
198
 
199
- # Update DBSCAN parameters when model changes
200
  model_input.change(
201
  fn=update_dbscan_params,
202
  inputs=[model_input],
203
  outputs=[eps_input, min_samples_input],
204
  )
205
 
206
- # Fix the task per tab; user can upload any video.
207
  run_button.click(
208
  fn=partial(process_video, task=task_value),
209
  inputs=[
@@ -219,14 +201,8 @@ def _build_task_tab(
219
  if examples:
220
  gr.Examples(
221
  examples=examples,
222
- inputs=[
223
- video_input,
224
- model_input,
225
- eps_input,
226
- min_samples_input,
227
- batch_size_input,
228
- ],
229
- label="Examples from ./inference",
230
  )
231
 
232
 
 
10
  from huggingface_hub import hf_hub_download
11
 
12
 
 
13
  ROOT_DIR = Path(__file__).resolve().parent
14
  if str(ROOT_DIR) not in sys.path:
15
  sys.path.append(str(ROOT_DIR))
16
 
 
17
  from main import run_both_tasks, run_frame_reordering, run_outlier_detection # type: ignore
18
 
19
 
 
41
  tasks = ("outliers", "reorder", "both")
42
  examples: Dict[str, List[List[object]]] = {t: [] for t in tasks}
43
 
 
44
  video_files = [
45
  "v_FieldHockeyPenalty_g23_c04.mp4",
46
  "v_BalanceBeam_g11_c03.mp4",
 
51
  "v_BalanceBeam_g13_c01.mp4",
52
  ]
53
 
 
54
  temp_dir = Path(tempfile.gettempdir()) / "gradio_examples"
55
  temp_dir.mkdir(exist_ok=True)
56
 
57
  try:
58
  for video_file in video_files[:max_examples]:
 
59
  cached_path = hf_hub_download(
60
  repo_id=dataset_repo,
61
  filename=video_file,
62
  repo_type="dataset"
63
  )
64
 
 
65
  temp_video_path = temp_dir / video_file
66
  if not temp_video_path.exists():
67
  shutil.copy2(cached_path, temp_video_path)
68
 
69
  for t in tasks:
70
  if len(examples[t]) < max_examples:
71
+ examples[t].append(str(temp_video_path))
 
 
 
 
 
 
 
 
 
72
 
73
  if all(len(examples[t]) >= max_examples for t in tasks):
74
  break
 
110
  if not input_path.exists():
111
  raise gr.Error(f"Video not found: {input_path}")
112
 
 
113
  outputs_root = ROOT_DIR / "hf_space_outputs"
114
  outputs_root.mkdir(parents=True, exist_ok=True)
115
 
 
137
  task_value: str,
138
  title: str,
139
  description: str,
140
+ examples: List[str],
141
  ):
142
  """Create a single tab for a specific task."""
143
  with gr.Tab(title):
 
180
  with gr.Column():
181
  output_video = gr.Video(label="Processed video")
182
 
 
183
  model_input.change(
184
  fn=update_dbscan_params,
185
  inputs=[model_input],
186
  outputs=[eps_input, min_samples_input],
187
  )
188
 
 
189
  run_button.click(
190
  fn=partial(process_video, task=task_value),
191
  inputs=[
 
201
  if examples:
202
  gr.Examples(
203
  examples=examples,
204
+ inputs=video_input,
205
+ label="Example Videos",
 
 
 
 
 
 
206
  )
207
 
208
 
main.py CHANGED
@@ -52,16 +52,9 @@ from tqdm import tqdm
52
  from outliers_removal_algorithm import dbscan_outliers, USE_GPU
53
  from reorder_frames_algorithm import load_video_gray, compute_mse_matrix, build_best_path
54
 
55
- # Device configuration
56
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
57
-
58
- # Supported video extensions
59
  VIDEO_EXTS = ('.avi', '.mp4', '.mov', '.mkv')
60
 
61
- # ==========================================
62
- # EMBEDDING EXTRACTION (Outlier Detection)
63
- # ==========================================
64
-
65
  def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
66
  """Load CLIP, DINOv2, or ResNet18 model for embedding extraction."""
67
  print(f"Loading {model_type.upper()} model...")
@@ -118,16 +111,10 @@ def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
118
  features = feature_extractor(images)
119
  feats = torch.tensor(features, device=device)
120
 
121
- # Debug: Print shape before processing
122
  if feats.dim() > 2:
123
- print(f"DEBUG: DINOv2 features shape before squeeze: {feats.shape}")
124
  feats = feats.squeeze(1)
125
 
126
  feats = torch.nn.functional.normalize(feats, dim=-1)
127
-
128
- # Debug: Print statistics
129
- print(f"DEBUG: DINOv2 batch - shape: {feats.shape}, mean: {feats.mean():.4f}, std: {feats.std():.4f}, min: {feats.min():.4f}, max: {feats.max():.4f}")
130
-
131
  return feats
132
 
133
  print(f"DINOv2 model loaded: {model_path} ({embedding_dim}-dim)")
@@ -136,9 +123,7 @@ def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
136
  elif model_type == 'resnet18':
137
  from torchvision import models, transforms
138
 
139
- # Load ResNet18 pretrained model
140
  model = models.resnet18(pretrained=True)
141
- # Remove the final classification layer to get embeddings
142
  model = torch.nn.Sequential(*list(model.children())[:-1])
143
  model = model.to(device)
144
  model.eval()
@@ -227,10 +212,6 @@ def extract_video_embeddings(video_path, extract_fn, preprocess, device='cuda',
227
  return embeddings, fps, width, height
228
 
229
 
230
- # ==========================================
231
- # VIDEO SAVING
232
- # ==========================================
233
-
234
  def save_cleaned_video(video_path, predictions, output_path, fps, width, height):
235
  """Create cleaned video with outliers removed."""
236
  num_outliers = predictions.sum()
@@ -271,7 +252,6 @@ def save_cleaned_video(video_path, predictions, output_path, fps, width, height)
271
 
272
  def save_reordered_video(video_path, frame_order, output_path):
273
  """Create reordered video using predicted frame order."""
274
- # Load all frames
275
  cap = cv2.VideoCapture(str(video_path))
276
  frames = []
277
  while True:
@@ -288,7 +268,6 @@ def save_reordered_video(video_path, frame_order, output_path):
288
  print(f" Total frames: {len(frames)}")
289
  print(f" Reconstructed order: {len(frame_order)} frames")
290
 
291
- # Write reordered video
292
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
293
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
294
 
@@ -305,7 +284,6 @@ def save_reordered_video(video_path, frame_order, output_path):
305
 
306
  def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_order, output_path):
307
  """Create video with outliers removed and frames reordered in one pass."""
308
- # Load all frames
309
  cap = cv2.VideoCapture(str(video_path))
310
  all_frames = []
311
  while True:
@@ -318,7 +296,6 @@ def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_orde
318
  height, width = all_frames[0].shape[:2]
319
  cap.release()
320
 
321
- # Filter out outliers
322
  inlier_frames = [all_frames[i] for i in range(len(all_frames))
323
  if i < len(outlier_predictions) and not outlier_predictions[i]]
324
 
@@ -329,7 +306,6 @@ def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_orde
329
  print(f" Inlier frames: {len(inlier_frames)} ({100*len(inlier_frames)/len(all_frames):.1f}%)")
330
  print(f" Reordered frames: {len(frame_order)}")
331
 
332
- # Write reordered video with only inlier frames
333
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
334
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
335
 
@@ -344,28 +320,21 @@ def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_orde
344
  return output_path
345
 
346
 
347
- # ==========================================
348
- # MAIN PIPELINE
349
- # ==========================================
350
-
351
  def run_outlier_detection(video_path, output_path, args):
352
  """Run outlier detection pipeline using imported functions."""
353
  print("OUTLIER DETECTION")
354
  print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
355
 
356
- # Load embedding model
357
  extract_fn, preprocess, embedding_dim = load_embedding_model(
358
  model_type=args.model_type,
359
  model_path=args.model_path,
360
  device=DEVICE
361
  )
362
 
363
- # Extract embeddings
364
  embeddings, fps, width, height = extract_video_embeddings(
365
  video_path, extract_fn, preprocess, DEVICE, args.batch_size
366
  )
367
 
368
- # Detect outliers using DBSCAN
369
  print(f"\nRunning DBSCAN outlier detection...")
370
  predictions = dbscan_outliers(
371
  embeddings,
@@ -373,7 +342,6 @@ def run_outlier_detection(video_path, output_path, args):
373
  min_samples=args.min_samples
374
  )
375
 
376
- # Save cleaned video
377
  cleaned_path = save_cleaned_video(video_path, predictions, output_path, fps, width, height)
378
  return cleaned_path
379
 
@@ -394,7 +362,6 @@ def run_frame_reordering(video_path, output_path):
394
  print("Building temporal path...")
395
  path = build_best_path(mse)
396
 
397
- # Save reordered video
398
  reordered_path = save_reordered_video(video_path, path, output_path)
399
  return reordered_path
400
 
@@ -406,7 +373,6 @@ def run_both_tasks(video_path, output_path, args):
406
  print("=" * 80)
407
  print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
408
 
409
- # Load embedding model and extract embeddings
410
  extract_fn, preprocess, embedding_dim = load_embedding_model(
411
  model_type=args.model_type,
412
  model_path=args.model_path,
@@ -417,19 +383,7 @@ def run_both_tasks(video_path, output_path, args):
417
  video_path, extract_fn, preprocess, DEVICE, args.batch_size
418
  )
419
 
420
- # Detect outliers using DBSCAN
421
  print(f"\nRunning DBSCAN outlier detection...")
422
- print(f"DEBUG: Final embeddings before DBSCAN - shape: {embeddings.shape}, mean: {embeddings.mean():.4f}, std: {embeddings.std():.4f}")
423
- print(f"DEBUG: Embeddings range - min: {embeddings.min():.4f}, max: {embeddings.max():.4f}")
424
-
425
- # Compute pairwise distances to understand embedding space
426
- import numpy as np
427
- from scipy.spatial.distance import pdist
428
- emb_np = embeddings.cpu().numpy() if hasattr(embeddings, 'cpu') else embeddings
429
- distances = pdist(emb_np, metric='euclidean')
430
- print(f"DEBUG: Pairwise distances - mean: {distances.mean():.4f}, std: {distances.std():.4f}, min: {distances.min():.4f}, max: {distances.max():.4f}")
431
- print(f"DEBUG: Current eps={args.eps}, so distances > eps will not form clusters")
432
-
433
  outlier_predictions = dbscan_outliers(
434
  embeddings,
435
  eps=args.eps,
@@ -443,20 +397,17 @@ def run_both_tasks(video_path, output_path, args):
443
  print(f" Inliers: {num_inliers} ({100*num_inliers/len(outlier_predictions):.1f}%)")
444
  print(f" Outliers: {num_outliers} ({100*num_outliers/len(outlier_predictions):.1f}%)")
445
 
446
- # Step 2: Frame reordering on inlier frames
447
  print("\n" + "=" * 80)
448
  print("STEP 2: FRAME REORDERING (on inlier frames)")
449
  print("=" * 80)
450
 
451
  all_frames = load_video_gray(str(video_path))
452
 
453
- # Filter to only inlier frames
454
  inlier_frames = []
455
  for i in range(len(all_frames)):
456
  if i < len(outlier_predictions) and not outlier_predictions[i]:
457
  inlier_frames.append(all_frames[i])
458
 
459
- # Check if we have any inlier frames
460
  if len(inlier_frames) == 0:
461
  print("\n⚠️ WARNING: All frames were detected as outliers!")
462
  print("This typically means the DBSCAN parameters are too strict for this video.")
@@ -467,7 +418,6 @@ def run_both_tasks(video_path, output_path, args):
467
  print(" - Try a different embedding model")
468
  print("\nReturning original video without processing...")
469
 
470
- # Copy original video to output
471
  import shutil
472
  shutil.copy2(video_path, output_path)
473
  return str(output_path)
@@ -476,7 +426,6 @@ def run_both_tasks(video_path, output_path, args):
476
  mse = compute_mse_matrix(inlier_frames)
477
  path = build_best_path(mse)
478
 
479
- # Save final video (cleaned and reordered)
480
  final_path = save_cleaned_and_reordered_video(video_path, outlier_predictions, path, output_path)
481
  return final_path
482
 
@@ -486,13 +435,11 @@ def get_output_path(input_path, output_dir, suffix="_fixed"):
486
  input_path = Path(input_path)
487
 
488
  if output_dir:
489
- # Use specified output directory
490
  output_dir = Path(output_dir)
491
  output_dir.mkdir(exist_ok=True, parents=True)
492
  output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
493
  return output_dir / output_name
494
  else:
495
- # Save in same directory as input
496
  output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
497
  return input_path.parent / output_name
498
 
@@ -511,10 +458,8 @@ def process_single_video(video_path, args):
511
  print(f"Task: {args.task.upper()}")
512
  print("=" * 80)
513
 
514
- # Determine output path
515
  output_path = get_output_path(video_path, args.output_dir)
516
 
517
- # Execute tasks
518
  if args.task == "outliers":
519
  run_outlier_detection(str(video_path), str(output_path), args)
520
 
@@ -522,7 +467,6 @@ def process_single_video(video_path, args):
522
  run_frame_reordering(str(video_path), str(output_path))
523
 
524
  elif args.task == "both":
525
- # Run both tasks without saving intermediate video
526
  run_both_tasks(str(video_path), str(output_path), args)
527
 
528
  print("\n" + "=" * 80)
@@ -539,7 +483,6 @@ def process_directory(input_dir, args):
539
  print(f"Error: Directory not found: {input_dir}")
540
  return
541
 
542
- # Find all video files
543
  video_files = []
544
  for ext in VIDEO_EXTS:
545
  video_files.extend(input_dir.glob(f"*{ext}"))
@@ -555,15 +498,12 @@ def process_directory(input_dir, args):
555
  print(f"Found {len(video_files)} video(s) in {input_dir}")
556
  print("=" * 80)
557
 
558
- # Process each video
559
  for i, video_path in enumerate(video_files, 1):
560
  print(f"\n[{i}/{len(video_files)}] Processing: {video_path.name}")
561
 
562
- # Determine output path
563
  output_path = get_output_path(video_path, args.output_dir)
564
 
565
  try:
566
- # Execute tasks
567
  if args.task == "outliers":
568
  run_outlier_detection(str(video_path), str(output_path), args)
569
 
@@ -571,7 +511,6 @@ def process_directory(input_dir, args):
571
  run_frame_reordering(str(video_path), str(output_path))
572
 
573
  elif args.task == "both":
574
- # Run both tasks without saving intermediate video
575
  run_both_tasks(str(video_path), str(output_path), args)
576
 
577
  print(f" ✓ Saved: {output_path}")
@@ -590,29 +529,24 @@ def main():
590
  description="Main script for video processing: outlier detection (DBSCAN) and/or frame reordering"
591
  )
592
 
593
- # Input arguments (mutually exclusive)
594
  input_group = parser.add_mutually_exclusive_group(required=True)
595
  input_group.add_argument("--video",
596
  help="Process a single video file")
597
  input_group.add_argument("--input-dir",
598
  help="Process all videos in a directory (default: ./inference)")
599
 
600
- # Task selection
601
  parser.add_argument("--task", required=True, choices=["outliers", "reorder", "both"],
602
  help="Task to perform: outliers, reorder, or both")
603
 
604
- # Output directory (optional)
605
  parser.add_argument("--output-dir",
606
  help="Output directory (default: same as input directory)")
607
 
608
- # Outlier detection parameters
609
  parser.add_argument("--model-type", default="clip", choices=["clip", "dinov2", "resnet18"],
610
  help="Embedding model type for outlier detection")
611
  parser.add_argument("--model-path", help="Path to DINOv2 model (optional)")
612
  parser.add_argument("--batch-size", type=int, default=128,
613
  help="Batch size for embedding extraction")
614
 
615
- # DBSCAN parameters
616
  parser.add_argument("--eps", type=float, default=0.5,
617
  help="DBSCAN: Epsilon parameter")
618
  parser.add_argument("--min-samples", type=int, default=40,
@@ -620,13 +554,9 @@ def main():
620
 
621
  args = parser.parse_args()
622
 
623
- # Default to ./inference if neither --video nor --input-dir specified
624
- # (This won't happen due to required=True, but keeping for clarity)
625
-
626
  if args.task in ["outliers", "both"]:
627
  print(f"DBSCAN parameters: eps={args.eps}, min_samples={args.min_samples}")
628
 
629
- # Process based on input mode
630
  if args.video:
631
  process_single_video(args.video, args)
632
  elif args.input_dir:
 
52
  from outliers_removal_algorithm import dbscan_outliers, USE_GPU
53
  from reorder_frames_algorithm import load_video_gray, compute_mse_matrix, build_best_path
54
 
 
55
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
 
56
  VIDEO_EXTS = ('.avi', '.mp4', '.mov', '.mkv')
57
 
 
 
 
 
58
  def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
59
  """Load CLIP, DINOv2, or ResNet18 model for embedding extraction."""
60
  print(f"Loading {model_type.upper()} model...")
 
111
  features = feature_extractor(images)
112
  feats = torch.tensor(features, device=device)
113
 
 
114
  if feats.dim() > 2:
 
115
  feats = feats.squeeze(1)
116
 
117
  feats = torch.nn.functional.normalize(feats, dim=-1)
 
 
 
 
118
  return feats
119
 
120
  print(f"DINOv2 model loaded: {model_path} ({embedding_dim}-dim)")
 
123
  elif model_type == 'resnet18':
124
  from torchvision import models, transforms
125
 
 
126
  model = models.resnet18(pretrained=True)
 
127
  model = torch.nn.Sequential(*list(model.children())[:-1])
128
  model = model.to(device)
129
  model.eval()
 
212
  return embeddings, fps, width, height
213
 
214
 
 
 
 
 
215
  def save_cleaned_video(video_path, predictions, output_path, fps, width, height):
216
  """Create cleaned video with outliers removed."""
217
  num_outliers = predictions.sum()
 
252
 
253
  def save_reordered_video(video_path, frame_order, output_path):
254
  """Create reordered video using predicted frame order."""
 
255
  cap = cv2.VideoCapture(str(video_path))
256
  frames = []
257
  while True:
 
268
  print(f" Total frames: {len(frames)}")
269
  print(f" Reconstructed order: {len(frame_order)} frames")
270
 
 
271
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
272
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
273
 
 
284
 
285
  def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_order, output_path):
286
  """Create video with outliers removed and frames reordered in one pass."""
 
287
  cap = cv2.VideoCapture(str(video_path))
288
  all_frames = []
289
  while True:
 
296
  height, width = all_frames[0].shape[:2]
297
  cap.release()
298
 
 
299
  inlier_frames = [all_frames[i] for i in range(len(all_frames))
300
  if i < len(outlier_predictions) and not outlier_predictions[i]]
301
 
 
306
  print(f" Inlier frames: {len(inlier_frames)} ({100*len(inlier_frames)/len(all_frames):.1f}%)")
307
  print(f" Reordered frames: {len(frame_order)}")
308
 
 
309
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
310
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
311
 
 
320
  return output_path
321
 
322
 
 
 
 
 
323
  def run_outlier_detection(video_path, output_path, args):
324
  """Run outlier detection pipeline using imported functions."""
325
  print("OUTLIER DETECTION")
326
  print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
327
 
 
328
  extract_fn, preprocess, embedding_dim = load_embedding_model(
329
  model_type=args.model_type,
330
  model_path=args.model_path,
331
  device=DEVICE
332
  )
333
 
 
334
  embeddings, fps, width, height = extract_video_embeddings(
335
  video_path, extract_fn, preprocess, DEVICE, args.batch_size
336
  )
337
 
 
338
  print(f"\nRunning DBSCAN outlier detection...")
339
  predictions = dbscan_outliers(
340
  embeddings,
 
342
  min_samples=args.min_samples
343
  )
344
 
 
345
  cleaned_path = save_cleaned_video(video_path, predictions, output_path, fps, width, height)
346
  return cleaned_path
347
 
 
362
  print("Building temporal path...")
363
  path = build_best_path(mse)
364
 
 
365
  reordered_path = save_reordered_video(video_path, path, output_path)
366
  return reordered_path
367
 
 
373
  print("=" * 80)
374
  print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
375
 
 
376
  extract_fn, preprocess, embedding_dim = load_embedding_model(
377
  model_type=args.model_type,
378
  model_path=args.model_path,
 
383
  video_path, extract_fn, preprocess, DEVICE, args.batch_size
384
  )
385
 
 
386
  print(f"\nRunning DBSCAN outlier detection...")
 
 
 
 
 
 
 
 
 
 
 
387
  outlier_predictions = dbscan_outliers(
388
  embeddings,
389
  eps=args.eps,
 
397
  print(f" Inliers: {num_inliers} ({100*num_inliers/len(outlier_predictions):.1f}%)")
398
  print(f" Outliers: {num_outliers} ({100*num_outliers/len(outlier_predictions):.1f}%)")
399
 
 
400
  print("\n" + "=" * 80)
401
  print("STEP 2: FRAME REORDERING (on inlier frames)")
402
  print("=" * 80)
403
 
404
  all_frames = load_video_gray(str(video_path))
405
 
 
406
  inlier_frames = []
407
  for i in range(len(all_frames)):
408
  if i < len(outlier_predictions) and not outlier_predictions[i]:
409
  inlier_frames.append(all_frames[i])
410
 
 
411
  if len(inlier_frames) == 0:
412
  print("\n⚠️ WARNING: All frames were detected as outliers!")
413
  print("This typically means the DBSCAN parameters are too strict for this video.")
 
418
  print(" - Try a different embedding model")
419
  print("\nReturning original video without processing...")
420
 
 
421
  import shutil
422
  shutil.copy2(video_path, output_path)
423
  return str(output_path)
 
426
  mse = compute_mse_matrix(inlier_frames)
427
  path = build_best_path(mse)
428
 
 
429
  final_path = save_cleaned_and_reordered_video(video_path, outlier_predictions, path, output_path)
430
  return final_path
431
 
 
435
  input_path = Path(input_path)
436
 
437
  if output_dir:
 
438
  output_dir = Path(output_dir)
439
  output_dir.mkdir(exist_ok=True, parents=True)
440
  output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
441
  return output_dir / output_name
442
  else:
 
443
  output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
444
  return input_path.parent / output_name
445
 
 
458
  print(f"Task: {args.task.upper()}")
459
  print("=" * 80)
460
 
 
461
  output_path = get_output_path(video_path, args.output_dir)
462
 
 
463
  if args.task == "outliers":
464
  run_outlier_detection(str(video_path), str(output_path), args)
465
 
 
467
  run_frame_reordering(str(video_path), str(output_path))
468
 
469
  elif args.task == "both":
 
470
  run_both_tasks(str(video_path), str(output_path), args)
471
 
472
  print("\n" + "=" * 80)
 
483
  print(f"Error: Directory not found: {input_dir}")
484
  return
485
 
 
486
  video_files = []
487
  for ext in VIDEO_EXTS:
488
  video_files.extend(input_dir.glob(f"*{ext}"))
 
498
  print(f"Found {len(video_files)} video(s) in {input_dir}")
499
  print("=" * 80)
500
 
 
501
  for i, video_path in enumerate(video_files, 1):
502
  print(f"\n[{i}/{len(video_files)}] Processing: {video_path.name}")
503
 
 
504
  output_path = get_output_path(video_path, args.output_dir)
505
 
506
  try:
 
507
  if args.task == "outliers":
508
  run_outlier_detection(str(video_path), str(output_path), args)
509
 
 
511
  run_frame_reordering(str(video_path), str(output_path))
512
 
513
  elif args.task == "both":
 
514
  run_both_tasks(str(video_path), str(output_path), args)
515
 
516
  print(f" ✓ Saved: {output_path}")
 
529
  description="Main script for video processing: outlier detection (DBSCAN) and/or frame reordering"
530
  )
531
 
 
532
  input_group = parser.add_mutually_exclusive_group(required=True)
533
  input_group.add_argument("--video",
534
  help="Process a single video file")
535
  input_group.add_argument("--input-dir",
536
  help="Process all videos in a directory (default: ./inference)")
537
 
 
538
  parser.add_argument("--task", required=True, choices=["outliers", "reorder", "both"],
539
  help="Task to perform: outliers, reorder, or both")
540
 
 
541
  parser.add_argument("--output-dir",
542
  help="Output directory (default: same as input directory)")
543
 
 
544
  parser.add_argument("--model-type", default="clip", choices=["clip", "dinov2", "resnet18"],
545
  help="Embedding model type for outlier detection")
546
  parser.add_argument("--model-path", help="Path to DINOv2 model (optional)")
547
  parser.add_argument("--batch-size", type=int, default=128,
548
  help="Batch size for embedding extraction")
549
 
 
550
  parser.add_argument("--eps", type=float, default=0.5,
551
  help="DBSCAN: Epsilon parameter")
552
  parser.add_argument("--min-samples", type=int, default=40,
 
554
 
555
  args = parser.parse_args()
556
 
 
 
 
557
  if args.task in ["outliers", "both"]:
558
  print(f"DBSCAN parameters: eps={args.eps}, min_samples={args.min_samples}")
559
 
 
560
  if args.video:
561
  process_single_video(args.video, args)
562
  elif args.input_dir: