halqadasi commited on
Commit ·
a82c2e6
1
Parent(s): 85a80ea
update the table to video gallery
Browse files
app.py
CHANGED
|
@@ -10,12 +10,10 @@ import gradio as gr
|
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
|
| 12 |
|
| 13 |
-
# Make project root importable so we can reuse main.py
|
| 14 |
ROOT_DIR = Path(__file__).resolve().parent
|
| 15 |
if str(ROOT_DIR) not in sys.path:
|
| 16 |
sys.path.append(str(ROOT_DIR))
|
| 17 |
|
| 18 |
-
# Import only the existing pipelines from main.py
|
| 19 |
from main import run_both_tasks, run_frame_reordering, run_outlier_detection # type: ignore
|
| 20 |
|
| 21 |
|
|
@@ -43,7 +41,6 @@ def _get_example_videos_per_task(max_examples: int = 7) -> Dict[str, List[List[o
|
|
| 43 |
tasks = ("outliers", "reorder", "both")
|
| 44 |
examples: Dict[str, List[List[object]]] = {t: [] for t in tasks}
|
| 45 |
|
| 46 |
-
# List of video files to download (add your actual filenames here)
|
| 47 |
video_files = [
|
| 48 |
"v_FieldHockeyPenalty_g23_c04.mp4",
|
| 49 |
"v_BalanceBeam_g11_c03.mp4",
|
|
@@ -54,36 +51,24 @@ def _get_example_videos_per_task(max_examples: int = 7) -> Dict[str, List[List[o
|
|
| 54 |
"v_BalanceBeam_g13_c01.mp4",
|
| 55 |
]
|
| 56 |
|
| 57 |
-
# Create a temp directory for examples
|
| 58 |
temp_dir = Path(tempfile.gettempdir()) / "gradio_examples"
|
| 59 |
temp_dir.mkdir(exist_ok=True)
|
| 60 |
|
| 61 |
try:
|
| 62 |
for video_file in video_files[:max_examples]:
|
| 63 |
-
# Download video from HF dataset
|
| 64 |
cached_path = hf_hub_download(
|
| 65 |
repo_id=dataset_repo,
|
| 66 |
filename=video_file,
|
| 67 |
repo_type="dataset"
|
| 68 |
)
|
| 69 |
|
| 70 |
-
# Copy to temp directory to avoid Gradio path restrictions
|
| 71 |
temp_video_path = temp_dir / video_file
|
| 72 |
if not temp_video_path.exists():
|
| 73 |
shutil.copy2(cached_path, temp_video_path)
|
| 74 |
|
| 75 |
for t in tasks:
|
| 76 |
if len(examples[t]) < max_examples:
|
| 77 |
-
|
| 78 |
-
examples[t].append(
|
| 79 |
-
[
|
| 80 |
-
str(temp_video_path),
|
| 81 |
-
"clip",
|
| 82 |
-
0.5,
|
| 83 |
-
40,
|
| 84 |
-
64,
|
| 85 |
-
]
|
| 86 |
-
)
|
| 87 |
|
| 88 |
if all(len(examples[t]) >= max_examples for t in tasks):
|
| 89 |
break
|
|
@@ -125,7 +110,6 @@ def process_video(
|
|
| 125 |
if not input_path.exists():
|
| 126 |
raise gr.Error(f"Video not found: {input_path}")
|
| 127 |
|
| 128 |
-
# Outputs are written to a local folder relative to the project root.
|
| 129 |
outputs_root = ROOT_DIR / "hf_space_outputs"
|
| 130 |
outputs_root.mkdir(parents=True, exist_ok=True)
|
| 131 |
|
|
@@ -153,7 +137,7 @@ def _build_task_tab(
|
|
| 153 |
task_value: str,
|
| 154 |
title: str,
|
| 155 |
description: str,
|
| 156 |
-
examples: List[
|
| 157 |
):
|
| 158 |
"""Create a single tab for a specific task."""
|
| 159 |
with gr.Tab(title):
|
|
@@ -196,14 +180,12 @@ def _build_task_tab(
|
|
| 196 |
with gr.Column():
|
| 197 |
output_video = gr.Video(label="Processed video")
|
| 198 |
|
| 199 |
-
# Update DBSCAN parameters when model changes
|
| 200 |
model_input.change(
|
| 201 |
fn=update_dbscan_params,
|
| 202 |
inputs=[model_input],
|
| 203 |
outputs=[eps_input, min_samples_input],
|
| 204 |
)
|
| 205 |
|
| 206 |
-
# Fix the task per tab; user can upload any video.
|
| 207 |
run_button.click(
|
| 208 |
fn=partial(process_video, task=task_value),
|
| 209 |
inputs=[
|
|
@@ -219,14 +201,8 @@ def _build_task_tab(
|
|
| 219 |
if examples:
|
| 220 |
gr.Examples(
|
| 221 |
examples=examples,
|
| 222 |
-
inputs=
|
| 223 |
-
|
| 224 |
-
model_input,
|
| 225 |
-
eps_input,
|
| 226 |
-
min_samples_input,
|
| 227 |
-
batch_size_input,
|
| 228 |
-
],
|
| 229 |
-
label="Examples from ./inference",
|
| 230 |
)
|
| 231 |
|
| 232 |
|
|
|
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
|
| 12 |
|
|
|
|
| 13 |
ROOT_DIR = Path(__file__).resolve().parent
|
| 14 |
if str(ROOT_DIR) not in sys.path:
|
| 15 |
sys.path.append(str(ROOT_DIR))
|
| 16 |
|
|
|
|
| 17 |
from main import run_both_tasks, run_frame_reordering, run_outlier_detection # type: ignore
|
| 18 |
|
| 19 |
|
|
|
|
| 41 |
tasks = ("outliers", "reorder", "both")
|
| 42 |
examples: Dict[str, List[List[object]]] = {t: [] for t in tasks}
|
| 43 |
|
|
|
|
| 44 |
video_files = [
|
| 45 |
"v_FieldHockeyPenalty_g23_c04.mp4",
|
| 46 |
"v_BalanceBeam_g11_c03.mp4",
|
|
|
|
| 51 |
"v_BalanceBeam_g13_c01.mp4",
|
| 52 |
]
|
| 53 |
|
|
|
|
| 54 |
temp_dir = Path(tempfile.gettempdir()) / "gradio_examples"
|
| 55 |
temp_dir.mkdir(exist_ok=True)
|
| 56 |
|
| 57 |
try:
|
| 58 |
for video_file in video_files[:max_examples]:
|
|
|
|
| 59 |
cached_path = hf_hub_download(
|
| 60 |
repo_id=dataset_repo,
|
| 61 |
filename=video_file,
|
| 62 |
repo_type="dataset"
|
| 63 |
)
|
| 64 |
|
|
|
|
| 65 |
temp_video_path = temp_dir / video_file
|
| 66 |
if not temp_video_path.exists():
|
| 67 |
shutil.copy2(cached_path, temp_video_path)
|
| 68 |
|
| 69 |
for t in tasks:
|
| 70 |
if len(examples[t]) < max_examples:
|
| 71 |
+
examples[t].append(str(temp_video_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
if all(len(examples[t]) >= max_examples for t in tasks):
|
| 74 |
break
|
|
|
|
| 110 |
if not input_path.exists():
|
| 111 |
raise gr.Error(f"Video not found: {input_path}")
|
| 112 |
|
|
|
|
| 113 |
outputs_root = ROOT_DIR / "hf_space_outputs"
|
| 114 |
outputs_root.mkdir(parents=True, exist_ok=True)
|
| 115 |
|
|
|
|
| 137 |
task_value: str,
|
| 138 |
title: str,
|
| 139 |
description: str,
|
| 140 |
+
examples: List[str],
|
| 141 |
):
|
| 142 |
"""Create a single tab for a specific task."""
|
| 143 |
with gr.Tab(title):
|
|
|
|
| 180 |
with gr.Column():
|
| 181 |
output_video = gr.Video(label="Processed video")
|
| 182 |
|
|
|
|
| 183 |
model_input.change(
|
| 184 |
fn=update_dbscan_params,
|
| 185 |
inputs=[model_input],
|
| 186 |
outputs=[eps_input, min_samples_input],
|
| 187 |
)
|
| 188 |
|
|
|
|
| 189 |
run_button.click(
|
| 190 |
fn=partial(process_video, task=task_value),
|
| 191 |
inputs=[
|
|
|
|
| 201 |
if examples:
|
| 202 |
gr.Examples(
|
| 203 |
examples=examples,
|
| 204 |
+
inputs=video_input,
|
| 205 |
+
label="Example Videos",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
)
|
| 207 |
|
| 208 |
|
main.py
CHANGED
|
@@ -52,16 +52,9 @@ from tqdm import tqdm
|
|
| 52 |
from outliers_removal_algorithm import dbscan_outliers, USE_GPU
|
| 53 |
from reorder_frames_algorithm import load_video_gray, compute_mse_matrix, build_best_path
|
| 54 |
|
| 55 |
-
# Device configuration
|
| 56 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 57 |
-
|
| 58 |
-
# Supported video extensions
|
| 59 |
VIDEO_EXTS = ('.avi', '.mp4', '.mov', '.mkv')
|
| 60 |
|
| 61 |
-
# ==========================================
|
| 62 |
-
# EMBEDDING EXTRACTION (Outlier Detection)
|
| 63 |
-
# ==========================================
|
| 64 |
-
|
| 65 |
def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
|
| 66 |
"""Load CLIP, DINOv2, or ResNet18 model for embedding extraction."""
|
| 67 |
print(f"Loading {model_type.upper()} model...")
|
|
@@ -118,16 +111,10 @@ def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
|
|
| 118 |
features = feature_extractor(images)
|
| 119 |
feats = torch.tensor(features, device=device)
|
| 120 |
|
| 121 |
-
# Debug: Print shape before processing
|
| 122 |
if feats.dim() > 2:
|
| 123 |
-
print(f"DEBUG: DINOv2 features shape before squeeze: {feats.shape}")
|
| 124 |
feats = feats.squeeze(1)
|
| 125 |
|
| 126 |
feats = torch.nn.functional.normalize(feats, dim=-1)
|
| 127 |
-
|
| 128 |
-
# Debug: Print statistics
|
| 129 |
-
print(f"DEBUG: DINOv2 batch - shape: {feats.shape}, mean: {feats.mean():.4f}, std: {feats.std():.4f}, min: {feats.min():.4f}, max: {feats.max():.4f}")
|
| 130 |
-
|
| 131 |
return feats
|
| 132 |
|
| 133 |
print(f"DINOv2 model loaded: {model_path} ({embedding_dim}-dim)")
|
|
@@ -136,9 +123,7 @@ def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
|
|
| 136 |
elif model_type == 'resnet18':
|
| 137 |
from torchvision import models, transforms
|
| 138 |
|
| 139 |
-
# Load ResNet18 pretrained model
|
| 140 |
model = models.resnet18(pretrained=True)
|
| 141 |
-
# Remove the final classification layer to get embeddings
|
| 142 |
model = torch.nn.Sequential(*list(model.children())[:-1])
|
| 143 |
model = model.to(device)
|
| 144 |
model.eval()
|
|
@@ -227,10 +212,6 @@ def extract_video_embeddings(video_path, extract_fn, preprocess, device='cuda',
|
|
| 227 |
return embeddings, fps, width, height
|
| 228 |
|
| 229 |
|
| 230 |
-
# ==========================================
|
| 231 |
-
# VIDEO SAVING
|
| 232 |
-
# ==========================================
|
| 233 |
-
|
| 234 |
def save_cleaned_video(video_path, predictions, output_path, fps, width, height):
|
| 235 |
"""Create cleaned video with outliers removed."""
|
| 236 |
num_outliers = predictions.sum()
|
|
@@ -271,7 +252,6 @@ def save_cleaned_video(video_path, predictions, output_path, fps, width, height)
|
|
| 271 |
|
| 272 |
def save_reordered_video(video_path, frame_order, output_path):
|
| 273 |
"""Create reordered video using predicted frame order."""
|
| 274 |
-
# Load all frames
|
| 275 |
cap = cv2.VideoCapture(str(video_path))
|
| 276 |
frames = []
|
| 277 |
while True:
|
|
@@ -288,7 +268,6 @@ def save_reordered_video(video_path, frame_order, output_path):
|
|
| 288 |
print(f" Total frames: {len(frames)}")
|
| 289 |
print(f" Reconstructed order: {len(frame_order)} frames")
|
| 290 |
|
| 291 |
-
# Write reordered video
|
| 292 |
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 293 |
out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
|
| 294 |
|
|
@@ -305,7 +284,6 @@ def save_reordered_video(video_path, frame_order, output_path):
|
|
| 305 |
|
| 306 |
def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_order, output_path):
|
| 307 |
"""Create video with outliers removed and frames reordered in one pass."""
|
| 308 |
-
# Load all frames
|
| 309 |
cap = cv2.VideoCapture(str(video_path))
|
| 310 |
all_frames = []
|
| 311 |
while True:
|
|
@@ -318,7 +296,6 @@ def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_orde
|
|
| 318 |
height, width = all_frames[0].shape[:2]
|
| 319 |
cap.release()
|
| 320 |
|
| 321 |
-
# Filter out outliers
|
| 322 |
inlier_frames = [all_frames[i] for i in range(len(all_frames))
|
| 323 |
if i < len(outlier_predictions) and not outlier_predictions[i]]
|
| 324 |
|
|
@@ -329,7 +306,6 @@ def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_orde
|
|
| 329 |
print(f" Inlier frames: {len(inlier_frames)} ({100*len(inlier_frames)/len(all_frames):.1f}%)")
|
| 330 |
print(f" Reordered frames: {len(frame_order)}")
|
| 331 |
|
| 332 |
-
# Write reordered video with only inlier frames
|
| 333 |
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 334 |
out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
|
| 335 |
|
|
@@ -344,28 +320,21 @@ def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_orde
|
|
| 344 |
return output_path
|
| 345 |
|
| 346 |
|
| 347 |
-
# ==========================================
|
| 348 |
-
# MAIN PIPELINE
|
| 349 |
-
# ==========================================
|
| 350 |
-
|
| 351 |
def run_outlier_detection(video_path, output_path, args):
|
| 352 |
"""Run outlier detection pipeline using imported functions."""
|
| 353 |
print("OUTLIER DETECTION")
|
| 354 |
print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
|
| 355 |
|
| 356 |
-
# Load embedding model
|
| 357 |
extract_fn, preprocess, embedding_dim = load_embedding_model(
|
| 358 |
model_type=args.model_type,
|
| 359 |
model_path=args.model_path,
|
| 360 |
device=DEVICE
|
| 361 |
)
|
| 362 |
|
| 363 |
-
# Extract embeddings
|
| 364 |
embeddings, fps, width, height = extract_video_embeddings(
|
| 365 |
video_path, extract_fn, preprocess, DEVICE, args.batch_size
|
| 366 |
)
|
| 367 |
|
| 368 |
-
# Detect outliers using DBSCAN
|
| 369 |
print(f"\nRunning DBSCAN outlier detection...")
|
| 370 |
predictions = dbscan_outliers(
|
| 371 |
embeddings,
|
|
@@ -373,7 +342,6 @@ def run_outlier_detection(video_path, output_path, args):
|
|
| 373 |
min_samples=args.min_samples
|
| 374 |
)
|
| 375 |
|
| 376 |
-
# Save cleaned video
|
| 377 |
cleaned_path = save_cleaned_video(video_path, predictions, output_path, fps, width, height)
|
| 378 |
return cleaned_path
|
| 379 |
|
|
@@ -394,7 +362,6 @@ def run_frame_reordering(video_path, output_path):
|
|
| 394 |
print("Building temporal path...")
|
| 395 |
path = build_best_path(mse)
|
| 396 |
|
| 397 |
-
# Save reordered video
|
| 398 |
reordered_path = save_reordered_video(video_path, path, output_path)
|
| 399 |
return reordered_path
|
| 400 |
|
|
@@ -406,7 +373,6 @@ def run_both_tasks(video_path, output_path, args):
|
|
| 406 |
print("=" * 80)
|
| 407 |
print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
|
| 408 |
|
| 409 |
-
# Load embedding model and extract embeddings
|
| 410 |
extract_fn, preprocess, embedding_dim = load_embedding_model(
|
| 411 |
model_type=args.model_type,
|
| 412 |
model_path=args.model_path,
|
|
@@ -417,19 +383,7 @@ def run_both_tasks(video_path, output_path, args):
|
|
| 417 |
video_path, extract_fn, preprocess, DEVICE, args.batch_size
|
| 418 |
)
|
| 419 |
|
| 420 |
-
# Detect outliers using DBSCAN
|
| 421 |
print(f"\nRunning DBSCAN outlier detection...")
|
| 422 |
-
print(f"DEBUG: Final embeddings before DBSCAN - shape: {embeddings.shape}, mean: {embeddings.mean():.4f}, std: {embeddings.std():.4f}")
|
| 423 |
-
print(f"DEBUG: Embeddings range - min: {embeddings.min():.4f}, max: {embeddings.max():.4f}")
|
| 424 |
-
|
| 425 |
-
# Compute pairwise distances to understand embedding space
|
| 426 |
-
import numpy as np
|
| 427 |
-
from scipy.spatial.distance import pdist
|
| 428 |
-
emb_np = embeddings.cpu().numpy() if hasattr(embeddings, 'cpu') else embeddings
|
| 429 |
-
distances = pdist(emb_np, metric='euclidean')
|
| 430 |
-
print(f"DEBUG: Pairwise distances - mean: {distances.mean():.4f}, std: {distances.std():.4f}, min: {distances.min():.4f}, max: {distances.max():.4f}")
|
| 431 |
-
print(f"DEBUG: Current eps={args.eps}, so distances > eps will not form clusters")
|
| 432 |
-
|
| 433 |
outlier_predictions = dbscan_outliers(
|
| 434 |
embeddings,
|
| 435 |
eps=args.eps,
|
|
@@ -443,20 +397,17 @@ def run_both_tasks(video_path, output_path, args):
|
|
| 443 |
print(f" Inliers: {num_inliers} ({100*num_inliers/len(outlier_predictions):.1f}%)")
|
| 444 |
print(f" Outliers: {num_outliers} ({100*num_outliers/len(outlier_predictions):.1f}%)")
|
| 445 |
|
| 446 |
-
# Step 2: Frame reordering on inlier frames
|
| 447 |
print("\n" + "=" * 80)
|
| 448 |
print("STEP 2: FRAME REORDERING (on inlier frames)")
|
| 449 |
print("=" * 80)
|
| 450 |
|
| 451 |
all_frames = load_video_gray(str(video_path))
|
| 452 |
|
| 453 |
-
# Filter to only inlier frames
|
| 454 |
inlier_frames = []
|
| 455 |
for i in range(len(all_frames)):
|
| 456 |
if i < len(outlier_predictions) and not outlier_predictions[i]:
|
| 457 |
inlier_frames.append(all_frames[i])
|
| 458 |
|
| 459 |
-
# Check if we have any inlier frames
|
| 460 |
if len(inlier_frames) == 0:
|
| 461 |
print("\n⚠️ WARNING: All frames were detected as outliers!")
|
| 462 |
print("This typically means the DBSCAN parameters are too strict for this video.")
|
|
@@ -467,7 +418,6 @@ def run_both_tasks(video_path, output_path, args):
|
|
| 467 |
print(" - Try a different embedding model")
|
| 468 |
print("\nReturning original video without processing...")
|
| 469 |
|
| 470 |
-
# Copy original video to output
|
| 471 |
import shutil
|
| 472 |
shutil.copy2(video_path, output_path)
|
| 473 |
return str(output_path)
|
|
@@ -476,7 +426,6 @@ def run_both_tasks(video_path, output_path, args):
|
|
| 476 |
mse = compute_mse_matrix(inlier_frames)
|
| 477 |
path = build_best_path(mse)
|
| 478 |
|
| 479 |
-
# Save final video (cleaned and reordered)
|
| 480 |
final_path = save_cleaned_and_reordered_video(video_path, outlier_predictions, path, output_path)
|
| 481 |
return final_path
|
| 482 |
|
|
@@ -486,13 +435,11 @@ def get_output_path(input_path, output_dir, suffix="_fixed"):
|
|
| 486 |
input_path = Path(input_path)
|
| 487 |
|
| 488 |
if output_dir:
|
| 489 |
-
# Use specified output directory
|
| 490 |
output_dir = Path(output_dir)
|
| 491 |
output_dir.mkdir(exist_ok=True, parents=True)
|
| 492 |
output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
|
| 493 |
return output_dir / output_name
|
| 494 |
else:
|
| 495 |
-
# Save in same directory as input
|
| 496 |
output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
|
| 497 |
return input_path.parent / output_name
|
| 498 |
|
|
@@ -511,10 +458,8 @@ def process_single_video(video_path, args):
|
|
| 511 |
print(f"Task: {args.task.upper()}")
|
| 512 |
print("=" * 80)
|
| 513 |
|
| 514 |
-
# Determine output path
|
| 515 |
output_path = get_output_path(video_path, args.output_dir)
|
| 516 |
|
| 517 |
-
# Execute tasks
|
| 518 |
if args.task == "outliers":
|
| 519 |
run_outlier_detection(str(video_path), str(output_path), args)
|
| 520 |
|
|
@@ -522,7 +467,6 @@ def process_single_video(video_path, args):
|
|
| 522 |
run_frame_reordering(str(video_path), str(output_path))
|
| 523 |
|
| 524 |
elif args.task == "both":
|
| 525 |
-
# Run both tasks without saving intermediate video
|
| 526 |
run_both_tasks(str(video_path), str(output_path), args)
|
| 527 |
|
| 528 |
print("\n" + "=" * 80)
|
|
@@ -539,7 +483,6 @@ def process_directory(input_dir, args):
|
|
| 539 |
print(f"Error: Directory not found: {input_dir}")
|
| 540 |
return
|
| 541 |
|
| 542 |
-
# Find all video files
|
| 543 |
video_files = []
|
| 544 |
for ext in VIDEO_EXTS:
|
| 545 |
video_files.extend(input_dir.glob(f"*{ext}"))
|
|
@@ -555,15 +498,12 @@ def process_directory(input_dir, args):
|
|
| 555 |
print(f"Found {len(video_files)} video(s) in {input_dir}")
|
| 556 |
print("=" * 80)
|
| 557 |
|
| 558 |
-
# Process each video
|
| 559 |
for i, video_path in enumerate(video_files, 1):
|
| 560 |
print(f"\n[{i}/{len(video_files)}] Processing: {video_path.name}")
|
| 561 |
|
| 562 |
-
# Determine output path
|
| 563 |
output_path = get_output_path(video_path, args.output_dir)
|
| 564 |
|
| 565 |
try:
|
| 566 |
-
# Execute tasks
|
| 567 |
if args.task == "outliers":
|
| 568 |
run_outlier_detection(str(video_path), str(output_path), args)
|
| 569 |
|
|
@@ -571,7 +511,6 @@ def process_directory(input_dir, args):
|
|
| 571 |
run_frame_reordering(str(video_path), str(output_path))
|
| 572 |
|
| 573 |
elif args.task == "both":
|
| 574 |
-
# Run both tasks without saving intermediate video
|
| 575 |
run_both_tasks(str(video_path), str(output_path), args)
|
| 576 |
|
| 577 |
print(f" ✓ Saved: {output_path}")
|
|
@@ -590,29 +529,24 @@ def main():
|
|
| 590 |
description="Main script for video processing: outlier detection (DBSCAN) and/or frame reordering"
|
| 591 |
)
|
| 592 |
|
| 593 |
-
# Input arguments (mutually exclusive)
|
| 594 |
input_group = parser.add_mutually_exclusive_group(required=True)
|
| 595 |
input_group.add_argument("--video",
|
| 596 |
help="Process a single video file")
|
| 597 |
input_group.add_argument("--input-dir",
|
| 598 |
help="Process all videos in a directory (default: ./inference)")
|
| 599 |
|
| 600 |
-
# Task selection
|
| 601 |
parser.add_argument("--task", required=True, choices=["outliers", "reorder", "both"],
|
| 602 |
help="Task to perform: outliers, reorder, or both")
|
| 603 |
|
| 604 |
-
# Output directory (optional)
|
| 605 |
parser.add_argument("--output-dir",
|
| 606 |
help="Output directory (default: same as input directory)")
|
| 607 |
|
| 608 |
-
# Outlier detection parameters
|
| 609 |
parser.add_argument("--model-type", default="clip", choices=["clip", "dinov2", "resnet18"],
|
| 610 |
help="Embedding model type for outlier detection")
|
| 611 |
parser.add_argument("--model-path", help="Path to DINOv2 model (optional)")
|
| 612 |
parser.add_argument("--batch-size", type=int, default=128,
|
| 613 |
help="Batch size for embedding extraction")
|
| 614 |
|
| 615 |
-
# DBSCAN parameters
|
| 616 |
parser.add_argument("--eps", type=float, default=0.5,
|
| 617 |
help="DBSCAN: Epsilon parameter")
|
| 618 |
parser.add_argument("--min-samples", type=int, default=40,
|
|
@@ -620,13 +554,9 @@ def main():
|
|
| 620 |
|
| 621 |
args = parser.parse_args()
|
| 622 |
|
| 623 |
-
# Default to ./inference if neither --video nor --input-dir specified
|
| 624 |
-
# (This won't happen due to required=True, but keeping for clarity)
|
| 625 |
-
|
| 626 |
if args.task in ["outliers", "both"]:
|
| 627 |
print(f"DBSCAN parameters: eps={args.eps}, min_samples={args.min_samples}")
|
| 628 |
|
| 629 |
-
# Process based on input mode
|
| 630 |
if args.video:
|
| 631 |
process_single_video(args.video, args)
|
| 632 |
elif args.input_dir:
|
|
|
|
| 52 |
from outliers_removal_algorithm import dbscan_outliers, USE_GPU
|
| 53 |
from reorder_frames_algorithm import load_video_gray, compute_mse_matrix, build_best_path
|
| 54 |
|
|
|
|
| 55 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
| 56 |
VIDEO_EXTS = ('.avi', '.mp4', '.mov', '.mkv')
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def load_embedding_model(model_type='clip', model_path=None, device='cuda'):
|
| 59 |
"""Load CLIP, DINOv2, or ResNet18 model for embedding extraction."""
|
| 60 |
print(f"Loading {model_type.upper()} model...")
|
|
|
|
| 111 |
features = feature_extractor(images)
|
| 112 |
feats = torch.tensor(features, device=device)
|
| 113 |
|
|
|
|
| 114 |
if feats.dim() > 2:
|
|
|
|
| 115 |
feats = feats.squeeze(1)
|
| 116 |
|
| 117 |
feats = torch.nn.functional.normalize(feats, dim=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
return feats
|
| 119 |
|
| 120 |
print(f"DINOv2 model loaded: {model_path} ({embedding_dim}-dim)")
|
|
|
|
| 123 |
elif model_type == 'resnet18':
|
| 124 |
from torchvision import models, transforms
|
| 125 |
|
|
|
|
| 126 |
model = models.resnet18(pretrained=True)
|
|
|
|
| 127 |
model = torch.nn.Sequential(*list(model.children())[:-1])
|
| 128 |
model = model.to(device)
|
| 129 |
model.eval()
|
|
|
|
| 212 |
return embeddings, fps, width, height
|
| 213 |
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
def save_cleaned_video(video_path, predictions, output_path, fps, width, height):
|
| 216 |
"""Create cleaned video with outliers removed."""
|
| 217 |
num_outliers = predictions.sum()
|
|
|
|
| 252 |
|
| 253 |
def save_reordered_video(video_path, frame_order, output_path):
|
| 254 |
"""Create reordered video using predicted frame order."""
|
|
|
|
| 255 |
cap = cv2.VideoCapture(str(video_path))
|
| 256 |
frames = []
|
| 257 |
while True:
|
|
|
|
| 268 |
print(f" Total frames: {len(frames)}")
|
| 269 |
print(f" Reconstructed order: {len(frame_order)} frames")
|
| 270 |
|
|
|
|
| 271 |
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 272 |
out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
|
| 273 |
|
|
|
|
| 284 |
|
| 285 |
def save_cleaned_and_reordered_video(video_path, outlier_predictions, frame_order, output_path):
|
| 286 |
"""Create video with outliers removed and frames reordered in one pass."""
|
|
|
|
| 287 |
cap = cv2.VideoCapture(str(video_path))
|
| 288 |
all_frames = []
|
| 289 |
while True:
|
|
|
|
| 296 |
height, width = all_frames[0].shape[:2]
|
| 297 |
cap.release()
|
| 298 |
|
|
|
|
| 299 |
inlier_frames = [all_frames[i] for i in range(len(all_frames))
|
| 300 |
if i < len(outlier_predictions) and not outlier_predictions[i]]
|
| 301 |
|
|
|
|
| 306 |
print(f" Inlier frames: {len(inlier_frames)} ({100*len(inlier_frames)/len(all_frames):.1f}%)")
|
| 307 |
print(f" Reordered frames: {len(frame_order)}")
|
| 308 |
|
|
|
|
| 309 |
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 310 |
out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
|
| 311 |
|
|
|
|
| 320 |
return output_path
|
| 321 |
|
| 322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
def run_outlier_detection(video_path, output_path, args):
|
| 324 |
"""Run outlier detection pipeline using imported functions."""
|
| 325 |
print("OUTLIER DETECTION")
|
| 326 |
print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
|
| 327 |
|
|
|
|
| 328 |
extract_fn, preprocess, embedding_dim = load_embedding_model(
|
| 329 |
model_type=args.model_type,
|
| 330 |
model_path=args.model_path,
|
| 331 |
device=DEVICE
|
| 332 |
)
|
| 333 |
|
|
|
|
| 334 |
embeddings, fps, width, height = extract_video_embeddings(
|
| 335 |
video_path, extract_fn, preprocess, DEVICE, args.batch_size
|
| 336 |
)
|
| 337 |
|
|
|
|
| 338 |
print(f"\nRunning DBSCAN outlier detection...")
|
| 339 |
predictions = dbscan_outliers(
|
| 340 |
embeddings,
|
|
|
|
| 342 |
min_samples=args.min_samples
|
| 343 |
)
|
| 344 |
|
|
|
|
| 345 |
cleaned_path = save_cleaned_video(video_path, predictions, output_path, fps, width, height)
|
| 346 |
return cleaned_path
|
| 347 |
|
|
|
|
| 362 |
print("Building temporal path...")
|
| 363 |
path = build_best_path(mse)
|
| 364 |
|
|
|
|
| 365 |
reordered_path = save_reordered_video(video_path, path, output_path)
|
| 366 |
return reordered_path
|
| 367 |
|
|
|
|
| 373 |
print("=" * 80)
|
| 374 |
print(f"GPU Acceleration: {'Enabled (cuML)' if USE_GPU else 'Disabled (CPU/sklearn)'}")
|
| 375 |
|
|
|
|
| 376 |
extract_fn, preprocess, embedding_dim = load_embedding_model(
|
| 377 |
model_type=args.model_type,
|
| 378 |
model_path=args.model_path,
|
|
|
|
| 383 |
video_path, extract_fn, preprocess, DEVICE, args.batch_size
|
| 384 |
)
|
| 385 |
|
|
|
|
| 386 |
print(f"\nRunning DBSCAN outlier detection...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
outlier_predictions = dbscan_outliers(
|
| 388 |
embeddings,
|
| 389 |
eps=args.eps,
|
|
|
|
| 397 |
print(f" Inliers: {num_inliers} ({100*num_inliers/len(outlier_predictions):.1f}%)")
|
| 398 |
print(f" Outliers: {num_outliers} ({100*num_outliers/len(outlier_predictions):.1f}%)")
|
| 399 |
|
|
|
|
| 400 |
print("\n" + "=" * 80)
|
| 401 |
print("STEP 2: FRAME REORDERING (on inlier frames)")
|
| 402 |
print("=" * 80)
|
| 403 |
|
| 404 |
all_frames = load_video_gray(str(video_path))
|
| 405 |
|
|
|
|
| 406 |
inlier_frames = []
|
| 407 |
for i in range(len(all_frames)):
|
| 408 |
if i < len(outlier_predictions) and not outlier_predictions[i]:
|
| 409 |
inlier_frames.append(all_frames[i])
|
| 410 |
|
|
|
|
| 411 |
if len(inlier_frames) == 0:
|
| 412 |
print("\n⚠️ WARNING: All frames were detected as outliers!")
|
| 413 |
print("This typically means the DBSCAN parameters are too strict for this video.")
|
|
|
|
| 418 |
print(" - Try a different embedding model")
|
| 419 |
print("\nReturning original video without processing...")
|
| 420 |
|
|
|
|
| 421 |
import shutil
|
| 422 |
shutil.copy2(video_path, output_path)
|
| 423 |
return str(output_path)
|
|
|
|
| 426 |
mse = compute_mse_matrix(inlier_frames)
|
| 427 |
path = build_best_path(mse)
|
| 428 |
|
|
|
|
| 429 |
final_path = save_cleaned_and_reordered_video(video_path, outlier_predictions, path, output_path)
|
| 430 |
return final_path
|
| 431 |
|
|
|
|
| 435 |
input_path = Path(input_path)
|
| 436 |
|
| 437 |
if output_dir:
|
|
|
|
| 438 |
output_dir = Path(output_dir)
|
| 439 |
output_dir.mkdir(exist_ok=True, parents=True)
|
| 440 |
output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
|
| 441 |
return output_dir / output_name
|
| 442 |
else:
|
|
|
|
| 443 |
output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
|
| 444 |
return input_path.parent / output_name
|
| 445 |
|
|
|
|
| 458 |
print(f"Task: {args.task.upper()}")
|
| 459 |
print("=" * 80)
|
| 460 |
|
|
|
|
| 461 |
output_path = get_output_path(video_path, args.output_dir)
|
| 462 |
|
|
|
|
| 463 |
if args.task == "outliers":
|
| 464 |
run_outlier_detection(str(video_path), str(output_path), args)
|
| 465 |
|
|
|
|
| 467 |
run_frame_reordering(str(video_path), str(output_path))
|
| 468 |
|
| 469 |
elif args.task == "both":
|
|
|
|
| 470 |
run_both_tasks(str(video_path), str(output_path), args)
|
| 471 |
|
| 472 |
print("\n" + "=" * 80)
|
|
|
|
| 483 |
print(f"Error: Directory not found: {input_dir}")
|
| 484 |
return
|
| 485 |
|
|
|
|
| 486 |
video_files = []
|
| 487 |
for ext in VIDEO_EXTS:
|
| 488 |
video_files.extend(input_dir.glob(f"*{ext}"))
|
|
|
|
| 498 |
print(f"Found {len(video_files)} video(s) in {input_dir}")
|
| 499 |
print("=" * 80)
|
| 500 |
|
|
|
|
| 501 |
for i, video_path in enumerate(video_files, 1):
|
| 502 |
print(f"\n[{i}/{len(video_files)}] Processing: {video_path.name}")
|
| 503 |
|
|
|
|
| 504 |
output_path = get_output_path(video_path, args.output_dir)
|
| 505 |
|
| 506 |
try:
|
|
|
|
| 507 |
if args.task == "outliers":
|
| 508 |
run_outlier_detection(str(video_path), str(output_path), args)
|
| 509 |
|
|
|
|
| 511 |
run_frame_reordering(str(video_path), str(output_path))
|
| 512 |
|
| 513 |
elif args.task == "both":
|
|
|
|
| 514 |
run_both_tasks(str(video_path), str(output_path), args)
|
| 515 |
|
| 516 |
print(f" ✓ Saved: {output_path}")
|
|
|
|
| 529 |
description="Main script for video processing: outlier detection (DBSCAN) and/or frame reordering"
|
| 530 |
)
|
| 531 |
|
|
|
|
| 532 |
input_group = parser.add_mutually_exclusive_group(required=True)
|
| 533 |
input_group.add_argument("--video",
|
| 534 |
help="Process a single video file")
|
| 535 |
input_group.add_argument("--input-dir",
|
| 536 |
help="Process all videos in a directory (default: ./inference)")
|
| 537 |
|
|
|
|
| 538 |
parser.add_argument("--task", required=True, choices=["outliers", "reorder", "both"],
|
| 539 |
help="Task to perform: outliers, reorder, or both")
|
| 540 |
|
|
|
|
| 541 |
parser.add_argument("--output-dir",
|
| 542 |
help="Output directory (default: same as input directory)")
|
| 543 |
|
|
|
|
| 544 |
parser.add_argument("--model-type", default="clip", choices=["clip", "dinov2", "resnet18"],
|
| 545 |
help="Embedding model type for outlier detection")
|
| 546 |
parser.add_argument("--model-path", help="Path to DINOv2 model (optional)")
|
| 547 |
parser.add_argument("--batch-size", type=int, default=128,
|
| 548 |
help="Batch size for embedding extraction")
|
| 549 |
|
|
|
|
| 550 |
parser.add_argument("--eps", type=float, default=0.5,
|
| 551 |
help="DBSCAN: Epsilon parameter")
|
| 552 |
parser.add_argument("--min-samples", type=int, default=40,
|
|
|
|
| 554 |
|
| 555 |
args = parser.parse_args()
|
| 556 |
|
|
|
|
|
|
|
|
|
|
| 557 |
if args.task in ["outliers", "both"]:
|
| 558 |
print(f"DBSCAN parameters: eps={args.eps}, min_samples={args.min_samples}")
|
| 559 |
|
|
|
|
| 560 |
if args.video:
|
| 561 |
process_single_video(args.video, args)
|
| 562 |
elif args.input_dir:
|