Update processing/two_stage/two_stage_processor.py
Browse files
processing/two_stage/two_stage_processor.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
EFFICIENT Two-Stage
|
| 4 |
-
VIDEO-TO-VIDEO PIPELINE:
|
| 5 |
Stage 1: SAM2 creates reference mask from first 3 frames
|
| 6 |
-
Stage 2: MatAnyone processes entire video → pha.mp4
|
| 7 |
-
Stage 3:
|
| 8 |
-
Stage 4: Adaptive chroma key compositing with background → final.mp4
|
| 9 |
FEATURES:
|
| 10 |
-
-
|
| 11 |
-
-
|
| 12 |
- Chunked processing for long videos with memory management
|
| 13 |
- Dimension mismatch handling
|
| 14 |
- Memory optimization with light/deep cleanup modes
|
|
@@ -28,6 +27,7 @@
|
|
| 28 |
import gc
|
| 29 |
import time
|
| 30 |
import traceback
|
|
|
|
| 31 |
from pathlib import Path
|
| 32 |
from typing import Optional, Tuple, Dict, Any, List
|
| 33 |
from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips
|
|
@@ -41,14 +41,6 @@
|
|
| 41 |
)
|
| 42 |
logger = logging.getLogger(__name__)
|
| 43 |
|
| 44 |
-
# Try to import sklearn, fallback gracefully
|
| 45 |
-
try:
|
| 46 |
-
from sklearn.cluster import KMeans
|
| 47 |
-
SKLEARN_AVAILABLE = True
|
| 48 |
-
except ImportError:
|
| 49 |
-
SKLEARN_AVAILABLE = False
|
| 50 |
-
logger.warning("scikit-learn not available, using fallback threshold detection")
|
| 51 |
-
|
| 52 |
# PyTorch memory management
|
| 53 |
try:
|
| 54 |
import torch
|
|
@@ -72,21 +64,6 @@ class ProcessingConfig:
|
|
| 72 |
CHUNK_OVERLAP_FRAMES = 5 # Frames to overlap between chunks for smooth transitions
|
| 73 |
MAX_PROCESSING_RESOLUTION = None # Keep full resolution for chunks
|
| 74 |
|
| 75 |
-
# Green screen settings
|
| 76 |
-
GREEN_COLOR = (0, 255, 0) # RGB green for green screen
|
| 77 |
-
GREEN_COLOR_NORMALIZED = (0.0, 1.0, 0.0) # Normalized for compositing
|
| 78 |
-
|
| 79 |
-
# Auto-detection settings
|
| 80 |
-
AUTO_DETECTION_FRAMES = 5 # Number of frames to analyze for auto-detection
|
| 81 |
-
AUTO_DETECTION_FALLBACK = 0.1 # Fallback threshold if auto-detection fails
|
| 82 |
-
MIN_THRESHOLD = 0.02 # Minimum allowed threshold
|
| 83 |
-
MAX_THRESHOLD = 0.3 # Maximum allowed threshold
|
| 84 |
-
|
| 85 |
-
# Adaptive optimization settings
|
| 86 |
-
ADAPTIVE_MAX_ITERATIONS = 10 # Maximum iterations for threshold optimization
|
| 87 |
-
ADAPTIVE_GREEN_TOLERANCE = 0.01 # Acceptable green residue level
|
| 88 |
-
ADAPTIVE_TRANSPARENCY_TOLERANCE = 0.1 # Acceptable transparency quality
|
| 89 |
-
|
| 90 |
# Quality settings
|
| 91 |
VIDEO_CODEC = 'libx264'
|
| 92 |
VIDEO_BITRATE = '8000k'
|
|
@@ -107,11 +84,11 @@ class ProcessingConfig:
|
|
| 107 |
ENABLE_MEMORY_TESTING = False # DISABLED: Memory testing causes predictor deletion
|
| 108 |
|
| 109 |
# ==============================================================================
|
| 110 |
-
# CHAPTER 3:
|
| 111 |
# ==============================================================================
|
| 112 |
|
| 113 |
class MemoryManager:
|
| 114 |
-
"""
|
| 115 |
|
| 116 |
def __init__(self, config: ProcessingConfig):
|
| 117 |
self.config = config
|
|
@@ -275,103 +252,8 @@ def get_memory_report(self) -> str:
|
|
| 275 |
report.append("="*60)
|
| 276 |
return "\n".join(report)
|
| 277 |
|
| 278 |
-
# ==============================================================================
|
| 279 |
-
# CHAPTER 4: MEMORY TESTING UTILITIES
|
| 280 |
-
# ==============================================================================
|
| 281 |
-
|
| 282 |
-
class MemoryTester:
|
| 283 |
-
"""Testing utilities for memory management verification."""
|
| 284 |
-
|
| 285 |
-
@staticmethod
|
| 286 |
-
def test_memory_cleanup(processor: 'TwoStageProcessor') -> Dict[str, Any]:
|
| 287 |
-
"""Test memory cleanup at each stage."""
|
| 288 |
-
results = {
|
| 289 |
-
'initial_memory': None,
|
| 290 |
-
'post_sam2_memory': None,
|
| 291 |
-
'post_matanyone_memory': None,
|
| 292 |
-
'post_final_memory': None,
|
| 293 |
-
'cleanup_effective': False,
|
| 294 |
-
'memory_leaks': []
|
| 295 |
-
}
|
| 296 |
-
|
| 297 |
-
try:
|
| 298 |
-
# Get initial memory
|
| 299 |
-
results['initial_memory'] = processor.memory_manager.get_current_memory()
|
| 300 |
-
logger.info(f"Initial memory: {results['initial_memory']['gpu_allocated']:.2f}GB")
|
| 301 |
-
|
| 302 |
-
# Create dummy data for testing
|
| 303 |
-
dummy_frame = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
|
| 304 |
-
|
| 305 |
-
# Test SAM2 cleanup
|
| 306 |
-
if hasattr(processor.sam2_handler, 'create_mask'):
|
| 307 |
-
logger.info("Testing SAM2 memory cleanup...")
|
| 308 |
-
_ = processor._create_sam2_mask(dummy_frame)
|
| 309 |
-
pre_cleanup = processor.memory_manager.get_current_memory()
|
| 310 |
-
# Use deep cleanup for testing only
|
| 311 |
-
processor.memory_manager.cleanup_model(processor.sam2_handler, "SAM2", deep_cleanup=True)
|
| 312 |
-
processor.memory_manager.cleanup_stage("SAM2_test", force=True)
|
| 313 |
-
post_cleanup = processor.memory_manager.get_current_memory()
|
| 314 |
-
|
| 315 |
-
results['post_sam2_memory'] = post_cleanup
|
| 316 |
-
sam2_freed = pre_cleanup['gpu_allocated'] - post_cleanup['gpu_allocated']
|
| 317 |
-
|
| 318 |
-
if sam2_freed > 0:
|
| 319 |
-
logger.info(f"SAM2 cleanup freed {sam2_freed:.2f}GB")
|
| 320 |
-
else:
|
| 321 |
-
results['memory_leaks'].append("SAM2 cleanup ineffective")
|
| 322 |
-
|
| 323 |
-
# Check if memory is properly freed
|
| 324 |
-
final_memory = processor.memory_manager.get_current_memory()
|
| 325 |
-
results['post_final_memory'] = final_memory
|
| 326 |
-
|
| 327 |
-
# Determine if cleanup was effective
|
| 328 |
-
memory_increase = final_memory['gpu_allocated'] - results['initial_memory']['gpu_allocated']
|
| 329 |
-
results['cleanup_effective'] = memory_increase < 0.1 # Less than 100MB increase
|
| 330 |
-
|
| 331 |
-
if not results['cleanup_effective']:
|
| 332 |
-
results['memory_leaks'].append(f"Memory increased by {memory_increase:.2f}GB")
|
| 333 |
-
|
| 334 |
-
# Generate report
|
| 335 |
-
logger.info(processor.memory_manager.get_memory_report())
|
| 336 |
-
|
| 337 |
-
except Exception as e:
|
| 338 |
-
logger.error(f"Memory testing failed: {e}")
|
| 339 |
-
results['error'] = str(e)
|
| 340 |
-
|
| 341 |
-
return results
|
| 342 |
-
|
| 343 |
-
@staticmethod
|
| 344 |
-
def monitor_memory_during_processing(func):
|
| 345 |
-
"""Decorator to monitor memory during a function call."""
|
| 346 |
-
def wrapper(*args, **kwargs):
|
| 347 |
-
if not TORCH_AVAILABLE:
|
| 348 |
-
return func(*args, **kwargs)
|
| 349 |
-
|
| 350 |
-
start_memory = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
|
| 351 |
-
start_time = time.time()
|
| 352 |
-
|
| 353 |
-
try:
|
| 354 |
-
result = func(*args, **kwargs)
|
| 355 |
-
|
| 356 |
-
end_memory = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
|
| 357 |
-
end_time = time.time()
|
| 358 |
-
|
| 359 |
-
memory_used = (end_memory - start_memory) / 1024**3
|
| 360 |
-
time_taken = end_time - start_time
|
| 361 |
-
|
| 362 |
-
func_name = func.__name__
|
| 363 |
-
logger.info(f"{func_name}: {time_taken:.1f}s, {memory_used:.2f}GB memory delta")
|
| 364 |
-
|
| 365 |
-
return result
|
| 366 |
-
|
| 367 |
-
except Exception as e:
|
| 368 |
-
logger.error(f"Error in {func.__name__}: {e}")
|
| 369 |
-
raise
|
| 370 |
-
|
| 371 |
-
return wrapper
|
| 372 |
-
|
| 373 |
# ==============================================================================
|
| 374 |
-
# CHAPTER
|
| 375 |
# ==============================================================================
|
| 376 |
|
| 377 |
class QualityManager:
|
|
@@ -404,7 +286,7 @@ def get_profile(cls, quality: str = 'medium') -> Dict[str, Any]:
|
|
| 404 |
return cls.PROFILES.get(quality, cls.PROFILES['medium'])
|
| 405 |
|
| 406 |
# ==============================================================================
|
| 407 |
-
# CHAPTER
|
| 408 |
# ==============================================================================
|
| 409 |
|
| 410 |
class ChunkedVideoProcessor:
|
|
@@ -511,21 +393,19 @@ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str)
|
|
| 511 |
raise
|
| 512 |
|
| 513 |
# ==============================================================================
|
| 514 |
-
# CHAPTER
|
| 515 |
# ==============================================================================
|
| 516 |
|
| 517 |
class TwoStageProcessor:
|
| 518 |
"""
|
| 519 |
-
Efficient two-stage
|
| 520 |
|
| 521 |
-
This processor avoids
|
| 522 |
1. SAM2 creates reference mask from first few frames
|
| 523 |
-
2. MatAnyone processes entire video using reference mask → pha.mp4
|
| 524 |
-
3.
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
6. Chunked processing for long videos with memory management
|
| 528 |
-
7. Dimension mismatch handling for robust processing
|
| 529 |
"""
|
| 530 |
|
| 531 |
def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
|
|
@@ -536,7 +416,6 @@ def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = No
|
|
| 536 |
self.config = ProcessingConfig()
|
| 537 |
self.memory_manager = MemoryManager(self.config)
|
| 538 |
self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
|
| 539 |
-
self.memory_tester = MemoryTester()
|
| 540 |
|
| 541 |
# Ensure temp directory exists
|
| 542 |
os.makedirs(self.temp_dir, exist_ok=True)
|
|
@@ -544,29 +423,6 @@ def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = No
|
|
| 544 |
|
| 545 |
# Log initial memory state
|
| 546 |
logger.info(self.memory_manager.get_gpu_memory_info())
|
| 547 |
-
|
| 548 |
-
# Memory test DISABLED - it deletes the predictor causing failures
|
| 549 |
-
if self.config.ENABLE_MEMORY_TESTING:
|
| 550 |
-
logger.info("Memory testing is disabled to prevent predictor deletion")
|
| 551 |
-
# self.run_memory_test() # DO NOT RUN - causes predictor deletion
|
| 552 |
-
|
| 553 |
-
def run_memory_test(self):
|
| 554 |
-
"""Run memory management tests. WARNING: This will delete the predictor!"""
|
| 555 |
-
try:
|
| 556 |
-
logger.warning("Running memory tests - this may affect handler state!")
|
| 557 |
-
test_results = self.memory_tester.test_memory_cleanup(self)
|
| 558 |
-
|
| 559 |
-
if test_results['cleanup_effective']:
|
| 560 |
-
logger.info("✅ Memory management tests passed")
|
| 561 |
-
else:
|
| 562 |
-
logger.warning(f"⚠️ Memory management issues detected: {test_results['memory_leaks']}")
|
| 563 |
-
|
| 564 |
-
# Try to restore predictor after test
|
| 565 |
-
if hasattr(self.sam2_handler, 'predictor') and self.sam2_handler.predictor is None:
|
| 566 |
-
logger.warning("SAM2 predictor was deleted by memory test - manual restoration required")
|
| 567 |
-
|
| 568 |
-
except Exception as e:
|
| 569 |
-
logger.warning(f"Memory tests failed: {e}")
|
| 570 |
|
| 571 |
def process_video(self,
|
| 572 |
video_path: str,
|
|
@@ -577,13 +433,13 @@ def process_video(self,
|
|
| 577 |
callback: Optional[callable] = None,
|
| 578 |
**kwargs) -> Tuple[str, str]:
|
| 579 |
"""
|
| 580 |
-
Main processing pipeline - video to video with
|
| 581 |
|
| 582 |
Returns:
|
| 583 |
Tuple[str, str]: (final_output_path, status_message)
|
| 584 |
"""
|
| 585 |
try:
|
| 586 |
-
logger.info(f"🎬 Two-Stage
|
| 587 |
logger.info(f"🎯 Background: {background_path}")
|
| 588 |
logger.info(f"📁 Temp: {self.temp_dir}")
|
| 589 |
logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
|
|
@@ -649,7 +505,7 @@ def _process_chunked_video(self,
|
|
| 649 |
else:
|
| 650 |
raise RuntimeError(f"Chunk {i+1} processing failed: {status}")
|
| 651 |
|
| 652 |
-
#
|
| 653 |
logger.info(f"Cleaning up after chunk {i+1}...")
|
| 654 |
self.memory_manager.cleanup_stage(f"Chunk_{i+1}", force=True)
|
| 655 |
|
|
@@ -675,7 +531,6 @@ def _process_chunked_video(self,
|
|
| 675 |
logger.error(f"Chunked processing failed: {e}")
|
| 676 |
raise
|
| 677 |
|
| 678 |
-
@MemoryTester.monitor_memory_during_processing
|
| 679 |
def _process_single_video(self,
|
| 680 |
video_path: str,
|
| 681 |
background_path: str,
|
|
@@ -707,21 +562,11 @@ def _process_single_video(self,
|
|
| 707 |
self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
|
| 708 |
self.memory_manager.cleanup_stage("MatAnyone", force=True)
|
| 709 |
|
| 710 |
-
# Stage 3:
|
| 711 |
-
if callback:
|
| 712 |
-
callback("Creating green screen intermediate video...", 70)
|
| 713 |
-
logger.info("STAGE 3: Creating green screen intermediate video...")
|
| 714 |
-
green_screen_path = self._stage3_create_green_screen_video(video_path, alpha_video_path)
|
| 715 |
-
|
| 716 |
-
# Memory cleanup after green screen
|
| 717 |
-
if self.memory_manager.should_clear_memory():
|
| 718 |
-
self.memory_manager.cleanup_stage("GreenScreen")
|
| 719 |
-
|
| 720 |
-
# Stage 4: Adaptive final compositing
|
| 721 |
if callback:
|
| 722 |
-
callback("
|
| 723 |
-
logger.info("STAGE
|
| 724 |
-
final_path = self.
|
| 725 |
|
| 726 |
# Final memory cleanup
|
| 727 |
self.memory_manager.cleanup_stage("Final")
|
|
@@ -760,10 +605,9 @@ def cleanup(self):
|
|
| 760 |
logger.warning(f"Failed to cleanup: {e}")
|
| 761 |
|
| 762 |
# ==============================================================================
|
| 763 |
-
# CHAPTER
|
| 764 |
# ==============================================================================
|
| 765 |
|
| 766 |
-
@MemoryTester.monitor_memory_during_processing
|
| 767 |
def _stage1_create_reference_mask(self, video_path: str) -> str:
|
| 768 |
"""
|
| 769 |
Stage 1: Create robust reference mask from first few frames using SAM2.
|
|
@@ -886,10 +730,9 @@ def _combine_reference_masks(self, masks: list) -> np.ndarray:
|
|
| 886 |
return result
|
| 887 |
|
| 888 |
# ==============================================================================
|
| 889 |
-
# CHAPTER
|
| 890 |
# ==============================================================================
|
| 891 |
|
| 892 |
-
@MemoryTester.monitor_memory_during_processing
|
| 893 |
def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
|
| 894 |
"""
|
| 895 |
Stage 2: Process entire video through MatAnyone using reference mask.
|
|
@@ -929,7 +772,7 @@ def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str
|
|
| 929 |
r_dilate=15,
|
| 930 |
suffix='pha',
|
| 931 |
save_image=False,
|
| 932 |
-
max_size=max_size #
|
| 933 |
)
|
| 934 |
|
| 935 |
except Exception as e:
|
|
@@ -948,7 +791,6 @@ def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str
|
|
| 948 |
|
| 949 |
# Verify MatAnyone output
|
| 950 |
if not alpha_output_path or not os.path.exists(alpha_output_path):
|
| 951 |
-
# List what files were actually created
|
| 952 |
files_created = os.listdir(matanyone_dir) if os.path.exists(matanyone_dir) else []
|
| 953 |
raise RuntimeError(f"MatAnyone did not create pha.mp4. Files created: {files_created}")
|
| 954 |
|
|
@@ -990,24 +832,22 @@ def _trim_video(self, input_path: str, output_path: str, seconds: int):
|
|
| 990 |
raise
|
| 991 |
|
| 992 |
# ==============================================================================
|
| 993 |
-
# CHAPTER
|
| 994 |
# ==============================================================================
|
| 995 |
|
| 996 |
-
|
| 997 |
-
|
| 998 |
"""
|
| 999 |
-
Stage 3:
|
| 1000 |
-
|
| 1001 |
-
Uses the alpha video from MatAnyone to create a green screen version:
|
| 1002 |
-
- Where alpha = 1 (person): keep original video
|
| 1003 |
-
- Where alpha = 0 (background): make it green (#00FF00)
|
| 1004 |
|
| 1005 |
-
|
|
|
|
| 1006 |
"""
|
| 1007 |
try:
|
| 1008 |
-
|
|
|
|
| 1009 |
|
| 1010 |
-
# Open
|
| 1011 |
original_cap = cv2.VideoCapture(original_video_path)
|
| 1012 |
alpha_cap = cv2.VideoCapture(alpha_video_path)
|
| 1013 |
|
|
@@ -1026,12 +866,26 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
|
|
| 1026 |
alpha_width = int(alpha_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 1027 |
alpha_height = int(alpha_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 1028 |
|
| 1029 |
-
logger.info(f"
|
| 1030 |
logger.info(f"Alpha video dimensions: {alpha_width}x{alpha_height}")
|
| 1031 |
|
| 1032 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 1034 |
-
out = cv2.VideoWriter(
|
| 1035 |
|
| 1036 |
frame_count = 0
|
| 1037 |
while True:
|
|
@@ -1042,13 +896,24 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
|
|
| 1042 |
if not ret_orig or not ret_alpha:
|
| 1043 |
break
|
| 1044 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1045 |
# Convert alpha frame to grayscale mask
|
| 1046 |
if len(alpha_frame.shape) == 3:
|
| 1047 |
alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
|
| 1048 |
else:
|
| 1049 |
alpha_mask = alpha_frame
|
| 1050 |
|
| 1051 |
-
#
|
| 1052 |
if alpha_mask.shape[:2] != orig_frame.shape[:2]:
|
| 1053 |
if frame_count == 0: # Log only once
|
| 1054 |
logger.info(f"Resizing alpha from {alpha_mask.shape[:2]} to {orig_frame.shape[:2]}")
|
|
@@ -1057,515 +922,59 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
|
|
| 1057 |
# Normalize alpha to 0-1 range
|
| 1058 |
alpha_normalized = alpha_mask.astype(np.float32) / 255.0
|
| 1059 |
|
| 1060 |
-
# Create
|
| 1061 |
-
green_bg = np.full_like(orig_frame, self.config.GREEN_COLOR, dtype=np.uint8)
|
| 1062 |
-
|
| 1063 |
-
# Composite: person where alpha=1, green where alpha=0
|
| 1064 |
-
# alpha_3d for broadcasting across color channels
|
| 1065 |
alpha_3d = np.stack([alpha_normalized] * 3, axis=2)
|
| 1066 |
|
| 1067 |
-
#
|
| 1068 |
orig_frame_float = orig_frame.astype(np.float32)
|
| 1069 |
-
|
| 1070 |
|
| 1071 |
-
#
|
| 1072 |
-
composite = alpha_3d * orig_frame_float + (1 - alpha_3d) *
|
| 1073 |
composite = np.clip(composite, 0, 255).astype(np.uint8)
|
| 1074 |
|
| 1075 |
# Write frame
|
| 1076 |
out.write(composite)
|
| 1077 |
frame_count += 1
|
| 1078 |
|
| 1079 |
-
#
|
| 1080 |
if frame_count % self.config.MEMORY_CHECK_INTERVAL == 0:
|
| 1081 |
-
logger.info(f"
|
| 1082 |
if self.memory_manager.should_clear_memory():
|
| 1083 |
-
logger.info("Memory high during
|
| 1084 |
self.memory_manager.clear_gpu_cache()
|
| 1085 |
|
| 1086 |
# Save debug frame occasionally
|
| 1087 |
if self.config.SAVE_DEBUG_FILES and frame_count % self.config.DEBUG_FRAME_INTERVAL == 0:
|
| 1088 |
-
debug_path = os.path.join(self.temp_dir, f"
|
| 1089 |
cv2.imwrite(debug_path, composite)
|
| 1090 |
|
| 1091 |
# Cleanup
|
| 1092 |
original_cap.release()
|
| 1093 |
alpha_cap.release()
|
| 1094 |
out.release()
|
|
|
|
|
|
|
| 1095 |
|
| 1096 |
if frame_count == 0:
|
| 1097 |
-
raise RuntimeError("No frames processed for
|
| 1098 |
-
|
| 1099 |
-
logger.info(f"✅ Green screen video created: {frame_count} frames → {green_screen_path}")
|
| 1100 |
-
return green_screen_path
|
| 1101 |
-
|
| 1102 |
-
except Exception as e:
|
| 1103 |
-
logger.error(f"Stage 3 failed: {e}")
|
| 1104 |
-
raise
|
| 1105 |
-
|
| 1106 |
-
# ==============================================================================
|
| 1107 |
-
# CHAPTER 11: STAGE 4 - ADAPTIVE CHROMA KEY COMPOSITING
|
| 1108 |
-
# ==============================================================================
|
| 1109 |
-
|
| 1110 |
-
@MemoryTester.monitor_memory_during_processing
|
| 1111 |
-
def _stage4_adaptive_compositing(self, green_screen_path: str, background_path: str,
|
| 1112 |
-
output_path: str, quality: str) -> str:
|
| 1113 |
-
"""
|
| 1114 |
-
Stage 4: Final compositing with adaptive threshold optimization.
|
| 1115 |
-
|
| 1116 |
-
Iteratively adjusts chroma key threshold until green is properly removed.
|
| 1117 |
-
"""
|
| 1118 |
-
try:
|
| 1119 |
-
# Get quality profile
|
| 1120 |
-
profile = QualityManager.get_profile(quality)
|
| 1121 |
-
|
| 1122 |
-
# Load videos/images
|
| 1123 |
-
green_clip = VideoFileClip(green_screen_path)
|
| 1124 |
-
|
| 1125 |
-
if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
| 1126 |
-
bg_clip = VideoFileClip(background_path)
|
| 1127 |
-
if bg_clip.duration < green_clip.duration:
|
| 1128 |
-
bg_clip = bg_clip.loop(duration=green_clip.duration)
|
| 1129 |
-
bg_clip = bg_clip.subclip(0, green_clip.duration)
|
| 1130 |
-
else:
|
| 1131 |
-
bg_clip = ImageClip(background_path, duration=green_clip.duration)
|
| 1132 |
-
|
| 1133 |
-
bg_clip = bg_clip.resize((green_clip.w, green_clip.h))
|
| 1134 |
-
|
| 1135 |
-
# Start with auto-detected threshold
|
| 1136 |
-
initial_threshold = self._auto_detect_chroma_threshold(green_screen_path)
|
| 1137 |
-
logger.info(f"Initial auto-detected threshold: {initial_threshold:.4f}")
|
| 1138 |
-
|
| 1139 |
-
# Adaptive optimization
|
| 1140 |
-
best_threshold = self._optimize_chroma_threshold(
|
| 1141 |
-
green_clip, bg_clip, initial_threshold,
|
| 1142 |
-
max_iterations=self.config.ADAPTIVE_MAX_ITERATIONS
|
| 1143 |
-
)
|
| 1144 |
-
|
| 1145 |
-
logger.info(f"✅ Optimized threshold: {best_threshold:.4f} (started from {initial_threshold:.4f})")
|
| 1146 |
-
|
| 1147 |
-
# Apply final chroma key with optimized threshold
|
| 1148 |
-
green_screen_keyed = green_clip.fx(vfx.mask_color,
|
| 1149 |
-
color=self.config.GREEN_COLOR_NORMALIZED,
|
| 1150 |
-
thr=best_threshold,
|
| 1151 |
-
s=0.1) # Reduced smoothing for sharper edges
|
| 1152 |
-
|
| 1153 |
-
# Composite and write
|
| 1154 |
-
final_clip = CompositeVideoClip([bg_clip, green_screen_keyed])
|
| 1155 |
-
|
| 1156 |
-
write_params = {
|
| 1157 |
-
'codec': self.config.VIDEO_CODEC,
|
| 1158 |
-
'bitrate': profile['bitrate'],
|
| 1159 |
-
'audio_codec': self.config.AUDIO_CODEC,
|
| 1160 |
-
'verbose': False,
|
| 1161 |
-
'logger': None
|
| 1162 |
-
}
|
| 1163 |
-
|
| 1164 |
-
if 'crf' in profile:
|
| 1165 |
-
write_params['ffmpeg_params'] = ['-crf', str(profile['crf'])]
|
| 1166 |
-
|
| 1167 |
-
final_clip.write_videofile(output_path, **write_params)
|
| 1168 |
-
|
| 1169 |
-
# Cleanup
|
| 1170 |
-
green_clip.close()
|
| 1171 |
-
bg_clip.close()
|
| 1172 |
-
final_clip.close()
|
| 1173 |
|
|
|
|
| 1174 |
if not os.path.exists(output_path):
|
| 1175 |
-
raise RuntimeError("
|
| 1176 |
-
|
| 1177 |
-
file_size = os.path.getsize(output_path) / (1024 * 1024)
|
| 1178 |
-
logger.info(f"✅
|
|
|
|
| 1179 |
|
| 1180 |
return output_path
|
| 1181 |
|
| 1182 |
except Exception as e:
|
| 1183 |
-
logger.error(f"Stage
|
| 1184 |
raise
|
| 1185 |
-
|
| 1186 |
-
def _optimize_chroma_threshold(self, green_clip, bg_clip, initial_threshold: float,
|
| 1187 |
-
max_iterations: int = 10) -> float:
|
| 1188 |
-
"""
|
| 1189 |
-
Iteratively optimize chroma key threshold by analyzing output quality.
|
| 1190 |
-
"""
|
| 1191 |
-
threshold = initial_threshold
|
| 1192 |
-
best_threshold = threshold
|
| 1193 |
-
best_score = float('inf')
|
| 1194 |
-
|
| 1195 |
-
# Binary search bounds
|
| 1196 |
-
low = max(self.config.MIN_THRESHOLD, initial_threshold * 0.5)
|
| 1197 |
-
high = min(self.config.MAX_THRESHOLD, initial_threshold * 2.0)
|
| 1198 |
-
|
| 1199 |
-
# Track tested thresholds to avoid repetition
|
| 1200 |
-
tested_thresholds = set()
|
| 1201 |
-
|
| 1202 |
-
for iteration in range(max_iterations):
|
| 1203 |
-
# Round threshold to avoid tiny differences
|
| 1204 |
-
threshold = round(threshold, 4)
|
| 1205 |
-
|
| 1206 |
-
# Skip if already tested
|
| 1207 |
-
if threshold in tested_thresholds:
|
| 1208 |
-
logger.info(f" Threshold {threshold:.4f} already tested, adjusting...")
|
| 1209 |
-
threshold = (low + high) / 2
|
| 1210 |
-
continue
|
| 1211 |
-
|
| 1212 |
-
tested_thresholds.add(threshold)
|
| 1213 |
-
logger.info(f"🔄 Optimization iteration {iteration + 1}/{max_iterations}, testing threshold: {threshold:.4f}")
|
| 1214 |
-
|
| 1215 |
-
# Apply chroma key with current threshold
|
| 1216 |
-
keyed = green_clip.fx(vfx.mask_color,
|
| 1217 |
-
color=self.config.GREEN_COLOR_NORMALIZED,
|
| 1218 |
-
thr=threshold,
|
| 1219 |
-
s=0.1)
|
| 1220 |
-
|
| 1221 |
-
# Composite
|
| 1222 |
-
test_composite = CompositeVideoClip([bg_clip, keyed])
|
| 1223 |
-
|
| 1224 |
-
# Extract test frames (beginning, middle, end)
|
| 1225 |
-
test_times = [
|
| 1226 |
-
green_clip.duration * 0.1,
|
| 1227 |
-
green_clip.duration * 0.5,
|
| 1228 |
-
green_clip.duration * 0.9
|
| 1229 |
-
]
|
| 1230 |
-
|
| 1231 |
-
total_green_score = 0
|
| 1232 |
-
total_transparency_score = 0
|
| 1233 |
-
|
| 1234 |
-
for test_time in test_times:
|
| 1235 |
-
test_frame = test_composite.get_frame(test_time)
|
| 1236 |
-
bg_frame = bg_clip.get_frame(test_time)
|
| 1237 |
-
|
| 1238 |
-
# Analyze the frame for green residue
|
| 1239 |
-
green_score = self._analyze_green_residue(test_frame)
|
| 1240 |
-
transparency_score = self._analyze_transparency_quality(test_frame, bg_frame)
|
| 1241 |
-
|
| 1242 |
-
total_green_score += green_score
|
| 1243 |
-
total_transparency_score += transparency_score
|
| 1244 |
-
|
| 1245 |
-
# Average scores
|
| 1246 |
-
avg_green_score = total_green_score / len(test_times)
|
| 1247 |
-
avg_transparency_score = total_transparency_score / len(test_times)
|
| 1248 |
-
|
| 1249 |
-
# Combined score (lower is better)
|
| 1250 |
-
total_score = avg_green_score + avg_transparency_score * 0.5
|
| 1251 |
-
|
| 1252 |
-
logger.info(f" 📊 Green residue: {avg_green_score:.4f}, Transparency: {avg_transparency_score:.4f}, Total: {total_score:.4f}")
|
| 1253 |
-
|
| 1254 |
-
# Update best if improved
|
| 1255 |
-
if total_score < best_score:
|
| 1256 |
-
best_score = total_score
|
| 1257 |
-
best_threshold = threshold
|
| 1258 |
-
logger.info(f" ✅ New best threshold: {best_threshold:.4f} (score: {best_score:.4f})")
|
| 1259 |
-
|
| 1260 |
-
# Check if we're good enough
|
| 1261 |
-
if avg_green_score < self.config.ADAPTIVE_GREEN_TOLERANCE and \
|
| 1262 |
-
avg_transparency_score < self.config.ADAPTIVE_TRANSPARENCY_TOLERANCE:
|
| 1263 |
-
logger.info(f" 🎯 Acceptable quality reached! Stopping optimization.")
|
| 1264 |
-
break
|
| 1265 |
-
|
| 1266 |
-
# Adjust threshold using binary search
|
| 1267 |
-
if avg_green_score > 0.05: # Too much green remains
|
| 1268 |
-
logger.info(f" 🟢 Too much green, decreasing threshold")
|
| 1269 |
-
high = threshold
|
| 1270 |
-
threshold = (low + threshold) / 2
|
| 1271 |
-
elif avg_transparency_score > 0.3: # Too much was removed
|
| 1272 |
-
logger.info(f" 👤 Subject too transparent, increasing threshold")
|
| 1273 |
-
low = threshold
|
| 1274 |
-
threshold = (threshold + high) / 2
|
| 1275 |
-
else:
|
| 1276 |
-
# Fine-tune around current value
|
| 1277 |
-
if avg_green_score > avg_transparency_score:
|
| 1278 |
-
threshold *= 0.95 # Slightly more aggressive
|
| 1279 |
-
else:
|
| 1280 |
-
threshold *= 1.05 # Slightly less aggressive
|
| 1281 |
-
|
| 1282 |
-
# Ensure we stay in bounds
|
| 1283 |
-
threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
|
| 1284 |
-
|
| 1285 |
-
# Clean up test composite
|
| 1286 |
-
test_composite.close()
|
| 1287 |
-
keyed.close()
|
| 1288 |
-
|
| 1289 |
-
# Stop if converged
|
| 1290 |
-
if abs(threshold - best_threshold) < 0.001 and iteration > 3:
|
| 1291 |
-
logger.info(" 📍 Converged, stopping optimization")
|
| 1292 |
-
break
|
| 1293 |
-
|
| 1294 |
-
return best_threshold
|
| 1295 |
-
|
| 1296 |
-
def _analyze_green_residue(self, frame: np.ndarray) -> float:
|
| 1297 |
-
"""
|
| 1298 |
-
Analyze how much green remains in the frame.
|
| 1299 |
-
Returns score from 0 (no green) to 1 (lots of green).
|
| 1300 |
-
"""
|
| 1301 |
-
# Convert to float
|
| 1302 |
-
img = frame.astype(np.float32) / 255.0
|
| 1303 |
-
|
| 1304 |
-
# Detect pure green pixels
|
| 1305 |
-
green_pixels = (
|
| 1306 |
-
(img[:,:,1] > 0.7) & # High green
|
| 1307 |
-
(img[:,:,0] < 0.3) & # Low red
|
| 1308 |
-
(img[:,:,2] < 0.3) # Low blue
|
| 1309 |
-
)
|
| 1310 |
-
|
| 1311 |
-
# Calculate percentage of green pixels
|
| 1312 |
-
green_ratio = np.sum(green_pixels) / (frame.shape[0] * frame.shape[1])
|
| 1313 |
-
|
| 1314 |
-
# Also check for greenish tint in other pixels
|
| 1315 |
-
greenish_pixels = (
|
| 1316 |
-
(img[:,:,1] > img[:,:,0] * 1.5) & # Green > Red * 1.5
|
| 1317 |
-
(img[:,:,1] > img[:,:,2] * 1.5) & # Green > Blue * 1.5
|
| 1318 |
-
(img[:,:,1] > 0.4) # Significant green
|
| 1319 |
-
)
|
| 1320 |
-
|
| 1321 |
-
greenish_ratio = np.sum(greenish_pixels) / (frame.shape[0] * frame.shape[1])
|
| 1322 |
-
|
| 1323 |
-
# Combined score
|
| 1324 |
-
score = green_ratio + greenish_ratio * 0.3
|
| 1325 |
-
|
| 1326 |
-
return min(1.0, score)
|
| 1327 |
-
|
| 1328 |
-
def _analyze_transparency_quality(self, composite_frame: np.ndarray, bg_frame: np.ndarray) -> float:
|
| 1329 |
-
"""
|
| 1330 |
-
Analyze if too much of the subject was removed.
|
| 1331 |
-
Returns score from 0 (good) to 1 (too much removed).
|
| 1332 |
-
"""
|
| 1333 |
-
# Calculate difference between composite and background
|
| 1334 |
-
diff = np.abs(composite_frame.astype(np.float32) - bg_frame.astype(np.float32))
|
| 1335 |
-
|
| 1336 |
-
# Sum of differences (more difference = more of subject preserved)
|
| 1337 |
-
total_diff = np.sum(diff) / (255.0 * 3 * composite_frame.shape[0] * composite_frame.shape[1])
|
| 1338 |
-
|
| 1339 |
-
# If difference is too small, too much was removed
|
| 1340 |
-
if total_diff < 0.05: # Less than 5% different from background
|
| 1341 |
-
return 1.0 # Bad - subject was removed
|
| 1342 |
-
elif total_diff > 0.3: # More than 30% different
|
| 1343 |
-
return 0.0 # Good - subject well preserved
|
| 1344 |
-
else:
|
| 1345 |
-
# Linear interpolation
|
| 1346 |
-
return 1.0 - (total_diff - 0.05) / 0.25
|
| 1347 |
-
|
| 1348 |
-
# ==============================================================================
|
| 1349 |
-
# CHAPTER 12: AUTO CHROMA KEY THRESHOLD DETECTION
|
| 1350 |
-
# ==============================================================================
|
| 1351 |
-
|
| 1352 |
-
def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
|
| 1353 |
-
"""
|
| 1354 |
-
Auto-detect initial chroma key threshold by analyzing green screen video.
|
| 1355 |
-
|
| 1356 |
-
This method:
|
| 1357 |
-
1. Samples frames from the green screen video
|
| 1358 |
-
2. Identifies green background pixels vs person pixels
|
| 1359 |
-
3. Calculates color distance between greenest background and person
|
| 1360 |
-
4. Sets threshold as percentage of that distance for initial guess
|
| 1361 |
-
"""
|
| 1362 |
-
try:
|
| 1363 |
-
logger.info("Analyzing green screen video for initial threshold detection...")
|
| 1364 |
-
|
| 1365 |
-
# Open green screen video
|
| 1366 |
-
cap = cv2.VideoCapture(green_screen_path)
|
| 1367 |
-
if not cap.isOpened():
|
| 1368 |
-
logger.warning("Cannot open green screen video for analysis")
|
| 1369 |
-
return self.config.AUTO_DETECTION_FALLBACK
|
| 1370 |
-
|
| 1371 |
-
# Get video properties
|
| 1372 |
-
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 1373 |
-
|
| 1374 |
-
# Sample frames evenly across the video
|
| 1375 |
-
frame_indices = np.linspace(0, total_frames - 1,
|
| 1376 |
-
min(self.config.AUTO_DETECTION_FRAMES, total_frames),
|
| 1377 |
-
dtype=int)
|
| 1378 |
-
|
| 1379 |
-
green_pixels = []
|
| 1380 |
-
person_pixels = []
|
| 1381 |
-
|
| 1382 |
-
for frame_idx in frame_indices:
|
| 1383 |
-
# Seek to specific frame
|
| 1384 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
| 1385 |
-
ret, frame = cap.read()
|
| 1386 |
-
|
| 1387 |
-
if not ret:
|
| 1388 |
-
continue
|
| 1389 |
-
|
| 1390 |
-
# Convert BGR to RGB for analysis
|
| 1391 |
-
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 1392 |
-
|
| 1393 |
-
# Analyze this frame
|
| 1394 |
-
green_px, person_px = self._analyze_frame_colors(frame_rgb)
|
| 1395 |
-
green_pixels.extend(green_px)
|
| 1396 |
-
person_pixels.extend(person_px)
|
| 1397 |
-
|
| 1398 |
-
cap.release()
|
| 1399 |
-
|
| 1400 |
-
if len(green_pixels) == 0 or len(person_pixels) == 0:
|
| 1401 |
-
logger.warning("Insufficient color data for auto-detection")
|
| 1402 |
-
return self.config.AUTO_DETECTION_FALLBACK
|
| 1403 |
-
|
| 1404 |
-
# Convert to numpy arrays
|
| 1405 |
-
green_pixels = np.array(green_pixels)
|
| 1406 |
-
person_pixels = np.array(person_pixels)
|
| 1407 |
-
|
| 1408 |
-
logger.info(f"Analyzed {len(green_pixels)} green pixels and {len(person_pixels)} person pixels")
|
| 1409 |
-
|
| 1410 |
-
# Calculate initial threshold
|
| 1411 |
-
threshold = self._calculate_initial_threshold(green_pixels, person_pixels)
|
| 1412 |
-
|
| 1413 |
-
# Clamp to safe range
|
| 1414 |
-
threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
|
| 1415 |
-
|
| 1416 |
-
logger.info(f"Initial threshold calculated: {threshold:.4f}")
|
| 1417 |
-
return threshold
|
| 1418 |
-
|
| 1419 |
-
except Exception as e:
|
| 1420 |
-
logger.warning(f"Auto-detection failed: {e}, using fallback")
|
| 1421 |
-
return self.config.AUTO_DETECTION_FALLBACK
|
| 1422 |
-
|
| 1423 |
-
def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
|
| 1424 |
-
"""
|
| 1425 |
-
Analyze a single frame to identify green pixels vs person pixels.
|
| 1426 |
-
|
| 1427 |
-
Returns:
|
| 1428 |
-
Tuple[list, list]: (green_pixels, person_pixels) as lists of RGB values
|
| 1429 |
-
"""
|
| 1430 |
-
try:
|
| 1431 |
-
# Convert to normalized float
|
| 1432 |
-
frame_norm = frame_rgb.astype(np.float32) / 255.0
|
| 1433 |
-
|
| 1434 |
-
# Identify likely green pixels (high green, low red/blue)
|
| 1435 |
-
green_mask = (
|
| 1436 |
-
(frame_norm[:, :, 1] > 0.7) & # High green
|
| 1437 |
-
(frame_norm[:, :, 0] < 0.3) & # Low red
|
| 1438 |
-
(frame_norm[:, :, 2] < 0.3) # Low blue
|
| 1439 |
-
)
|
| 1440 |
-
|
| 1441 |
-
# Identify likely person pixels (balanced colors, not green-dominant)
|
| 1442 |
-
person_mask = (
|
| 1443 |
-
(frame_norm[:, :, 1] < 0.6) | # Not too green
|
| 1444 |
-
((frame_norm[:, :, 0] > 0.2) & (frame_norm[:, :, 2] > 0.2)) # Some red and blue
|
| 1445 |
-
)
|
| 1446 |
-
|
| 1447 |
-
# Sample pixels (subsample for performance)
|
| 1448 |
-
# Sample green pixels
|
| 1449 |
-
green_coords = np.where(green_mask)
|
| 1450 |
-
if len(green_coords[0]) > 1000:
|
| 1451 |
-
indices = np.random.choice(len(green_coords[0]), 1000, replace=False)
|
| 1452 |
-
green_coords = (green_coords[0][indices], green_coords[1][indices])
|
| 1453 |
-
|
| 1454 |
-
green_pixels = frame_norm[green_coords].tolist()
|
| 1455 |
-
|
| 1456 |
-
# Sample person pixels
|
| 1457 |
-
person_coords = np.where(person_mask)
|
| 1458 |
-
if len(person_coords[0]) > 1000:
|
| 1459 |
-
indices = np.random.choice(len(person_coords[0]), 1000, replace=False)
|
| 1460 |
-
person_coords = (person_coords[0][indices], person_coords[1][indices])
|
| 1461 |
-
|
| 1462 |
-
person_pixels = frame_norm[person_coords].tolist()
|
| 1463 |
-
|
| 1464 |
-
return green_pixels, person_pixels
|
| 1465 |
-
|
| 1466 |
-
except Exception as e:
|
| 1467 |
-
logger.warning(f"Frame color analysis failed: {e}")
|
| 1468 |
-
return [], []
|
| 1469 |
-
|
| 1470 |
-
def _calculate_initial_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
|
| 1471 |
-
"""
|
| 1472 |
-
Calculate initial threshold based on color analysis.
|
| 1473 |
-
|
| 1474 |
-
Strategy:
|
| 1475 |
-
1. Find the "least green" green pixels (edge of green screen)
|
| 1476 |
-
2. Find the "most green" person pixels (skin tones, clothing with green)
|
| 1477 |
-
3. Calculate color distance between these clusters
|
| 1478 |
-
4. Set threshold as percentage of that distance
|
| 1479 |
-
"""
|
| 1480 |
-
try:
|
| 1481 |
-
if SKLEARN_AVAILABLE and len(green_pixels) > 50 and len(person_pixels) > 50:
|
| 1482 |
-
# Convert to LAB color space for better perceptual distance
|
| 1483 |
-
green_lab = self._rgb_to_lab_batch(green_pixels)
|
| 1484 |
-
person_lab = self._rgb_to_lab_batch(person_pixels)
|
| 1485 |
-
|
| 1486 |
-
# Use clustering to find representative colors
|
| 1487 |
-
kmeans_green = KMeans(n_clusters=min(5, len(green_lab)//10), random_state=42, n_init=10)
|
| 1488 |
-
green_clusters = kmeans_green.fit_predict(green_lab)
|
| 1489 |
-
green_centers = kmeans_green.cluster_centers_
|
| 1490 |
-
|
| 1491 |
-
# Find cluster closest to the target green
|
| 1492 |
-
target_green_lab = self._rgb_to_lab(np.array([[[0, 1, 0]]]))[0][0]
|
| 1493 |
-
distances_to_target = np.linalg.norm(green_centers - target_green_lab, axis=1)
|
| 1494 |
-
main_green_cluster = green_centers[np.argmin(distances_to_target)]
|
| 1495 |
-
|
| 1496 |
-
# Find the most green of the person pixels
|
| 1497 |
-
kmeans_person = KMeans(n_clusters=min(5, len(person_lab)//10), random_state=42, n_init=10)
|
| 1498 |
-
person_clusters = kmeans_person.fit_predict(person_lab)
|
| 1499 |
-
person_centers = kmeans_person.cluster_centers_
|
| 1500 |
-
|
| 1501 |
-
# Find person cluster closest to green
|
| 1502 |
-
distances_to_green = np.linalg.norm(person_centers - main_green_cluster, axis=1)
|
| 1503 |
-
closest_person_cluster = person_centers[np.argmin(distances_to_green)]
|
| 1504 |
-
|
| 1505 |
-
# Calculate color distance
|
| 1506 |
-
color_distance = np.linalg.norm(main_green_cluster - closest_person_cluster)
|
| 1507 |
-
|
| 1508 |
-
# Convert LAB distance to threshold
|
| 1509 |
-
# Start conservative - use 40% of distance for initial guess
|
| 1510 |
-
threshold = (color_distance / 100.0) * 0.4
|
| 1511 |
-
|
| 1512 |
-
logger.info(f"Color distance analysis: LAB distance={color_distance:.2f}, initial threshold={threshold:.3f}")
|
| 1513 |
-
|
| 1514 |
-
return threshold
|
| 1515 |
-
else:
|
| 1516 |
-
# Fallback: analyze in RGB space
|
| 1517 |
-
return self._simple_rgb_threshold(green_pixels, person_pixels)
|
| 1518 |
-
|
| 1519 |
-
except Exception as e:
|
| 1520 |
-
logger.warning(f"Threshold calculation failed: {e}")
|
| 1521 |
-
return self._simple_rgb_threshold(green_pixels, person_pixels)
|
| 1522 |
-
|
| 1523 |
-
def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
|
| 1524 |
-
"""Fallback RGB-based threshold calculation."""
|
| 1525 |
-
try:
|
| 1526 |
-
# Find average green pixel
|
| 1527 |
-
avg_green = np.mean(green_pixels, axis=0)
|
| 1528 |
-
|
| 1529 |
-
# Find person pixel closest to green
|
| 1530 |
-
green_distances = np.linalg.norm(person_pixels - avg_green, axis=1)
|
| 1531 |
-
min_distance = np.min(green_distances)
|
| 1532 |
-
|
| 1533 |
-
# Use 50% of minimum distance as initial threshold
|
| 1534 |
-
threshold = min_distance * 0.5
|
| 1535 |
-
|
| 1536 |
-
logger.info(f"RGB fallback: min_distance={min_distance:.3f}, threshold={threshold:.3f}")
|
| 1537 |
-
return threshold
|
| 1538 |
-
|
| 1539 |
-
except Exception as e:
|
| 1540 |
-
logger.warning(f"RGB fallback failed: {e}")
|
| 1541 |
-
return self.config.AUTO_DETECTION_FALLBACK
|
| 1542 |
-
|
| 1543 |
-
def _rgb_to_lab_batch(self, rgb_batch: np.ndarray) -> np.ndarray:
|
| 1544 |
-
"""Convert batch of RGB values to LAB color space."""
|
| 1545 |
-
try:
|
| 1546 |
-
# Reshape for OpenCV
|
| 1547 |
-
rgb_reshaped = rgb_batch.reshape(-1, 1, 3).astype(np.float32)
|
| 1548 |
-
|
| 1549 |
-
# Convert to LAB
|
| 1550 |
-
lab = cv2.cvtColor(rgb_reshaped, cv2.COLOR_RGB2LAB)
|
| 1551 |
-
|
| 1552 |
-
# Reshape back
|
| 1553 |
-
return lab.reshape(-1, 3)
|
| 1554 |
-
|
| 1555 |
-
except Exception as e:
|
| 1556 |
-
logger.warning(f"LAB conversion failed: {e}")
|
| 1557 |
-
return rgb_batch # Return RGB as fallback
|
| 1558 |
-
|
| 1559 |
-
def _rgb_to_lab(self, rgb: np.ndarray) -> np.ndarray:
|
| 1560 |
-
"""Convert single RGB image to LAB."""
|
| 1561 |
-
try:
|
| 1562 |
-
return cv2.cvtColor(rgb.astype(np.float32), cv2.COLOR_RGB2LAB)
|
| 1563 |
-
except Exception as e:
|
| 1564 |
-
logger.warning(f"Single LAB conversion failed: {e}")
|
| 1565 |
-
return rgb
|
| 1566 |
|
| 1567 |
# ==============================================================================
|
| 1568 |
-
# CHAPTER
|
| 1569 |
# ==============================================================================
|
| 1570 |
|
| 1571 |
def _debug_video_info(self, video_path: str, label: str = "Video"):
|
|
@@ -1622,7 +1031,7 @@ def get_processing_stats(self) -> Dict[str, Any]:
|
|
| 1622 |
return stats
|
| 1623 |
|
| 1624 |
# ==============================================================================
|
| 1625 |
-
# CHAPTER
|
| 1626 |
# ==============================================================================
|
| 1627 |
|
| 1628 |
def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
|
|
@@ -1636,14 +1045,13 @@ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
|
|
| 1636 |
# Main execution example
|
| 1637 |
if __name__ == "__main__":
|
| 1638 |
# Example usage - replace with your actual handlers
|
| 1639 |
-
logger.info("TwoStageProcessor
|
| 1640 |
logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
|
| 1641 |
|
| 1642 |
# Print configuration
|
| 1643 |
config = ProcessingConfig()
|
| 1644 |
-
logger.info(f"
|
| 1645 |
-
logger.info(f"
|
| 1646 |
-
logger.info(f"
|
| 1647 |
logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
|
| 1648 |
-
logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
|
| 1649 |
-
logger.info(f"Memory testing: {'DISABLED' if not config.ENABLE_MEMORY_TESTING else 'ENABLED'}")
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
EFFICIENT Two-Stage Alpha Channel Processing System ✅ 2025-09-09
|
| 4 |
+
VIDEO-TO-VIDEO PIPELINE: Direct alpha compositing without green screen
|
| 5 |
Stage 1: SAM2 creates reference mask from first 3 frames
|
| 6 |
+
Stage 2: MatAnyone processes entire video → pha.mp4 (alpha matte)
|
| 7 |
+
Stage 3: Direct alpha compositing with background → final.mp4
|
|
|
|
| 8 |
FEATURES:
|
| 9 |
+
- No green screen or chroma key needed
|
| 10 |
+
- Direct alpha channel compositing
|
| 11 |
- Chunked processing for long videos with memory management
|
| 12 |
- Dimension mismatch handling
|
| 13 |
- Memory optimization with light/deep cleanup modes
|
|
|
|
| 27 |
import gc
|
| 28 |
import time
|
| 29 |
import traceback
|
| 30 |
+
import subprocess
|
| 31 |
from pathlib import Path
|
| 32 |
from typing import Optional, Tuple, Dict, Any, List
|
| 33 |
from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips
|
|
|
|
| 41 |
)
|
| 42 |
logger = logging.getLogger(__name__)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# PyTorch memory management
|
| 45 |
try:
|
| 46 |
import torch
|
|
|
|
| 64 |
CHUNK_OVERLAP_FRAMES = 5 # Frames to overlap between chunks for smooth transitions
|
| 65 |
MAX_PROCESSING_RESOLUTION = None # Keep full resolution for chunks
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
# Quality settings
|
| 68 |
VIDEO_CODEC = 'libx264'
|
| 69 |
VIDEO_BITRATE = '8000k'
|
|
|
|
| 84 |
ENABLE_MEMORY_TESTING = False # DISABLED: Memory testing causes predictor deletion
|
| 85 |
|
| 86 |
# ==============================================================================
|
| 87 |
+
# CHAPTER 3: MEMORY MANAGEMENT UTILITIES
|
| 88 |
# ==============================================================================
|
| 89 |
|
| 90 |
class MemoryManager:
|
| 91 |
+
"""Utilities for managing GPU and system memory."""
|
| 92 |
|
| 93 |
def __init__(self, config: ProcessingConfig):
|
| 94 |
self.config = config
|
|
|
|
| 252 |
report.append("="*60)
|
| 253 |
return "\n".join(report)
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
# ==============================================================================
|
| 256 |
+
# CHAPTER 4: QUALITY MANAGER
|
| 257 |
# ==============================================================================
|
| 258 |
|
| 259 |
class QualityManager:
|
|
|
|
| 286 |
return cls.PROFILES.get(quality, cls.PROFILES['medium'])
|
| 287 |
|
| 288 |
# ==============================================================================
|
| 289 |
+
# CHAPTER 5: CHUNKED VIDEO PROCESSOR
|
| 290 |
# ==============================================================================
|
| 291 |
|
| 292 |
class ChunkedVideoProcessor:
|
|
|
|
| 393 |
raise
|
| 394 |
|
| 395 |
# ==============================================================================
|
| 396 |
+
# CHAPTER 6: TWOSTAGEPROCESSOR CLASS DEFINITION
|
| 397 |
# ==============================================================================
|
| 398 |
|
| 399 |
class TwoStageProcessor:
|
| 400 |
"""
|
| 401 |
+
Efficient two-stage alpha channel processor with video-to-video pipeline.
|
| 402 |
|
| 403 |
+
This processor avoids green screen entirely by using alpha channels:
|
| 404 |
1. SAM2 creates reference mask from first few frames
|
| 405 |
+
2. MatAnyone processes entire video using reference mask → pha.mp4 (alpha matte)
|
| 406 |
+
3. Direct alpha compositing with background → final.mp4
|
| 407 |
+
|
| 408 |
+
No chroma key or green screen needed!
|
|
|
|
|
|
|
| 409 |
"""
|
| 410 |
|
| 411 |
def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
|
|
|
|
| 416 |
self.config = ProcessingConfig()
|
| 417 |
self.memory_manager = MemoryManager(self.config)
|
| 418 |
self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
|
|
|
|
| 419 |
|
| 420 |
# Ensure temp directory exists
|
| 421 |
os.makedirs(self.temp_dir, exist_ok=True)
|
|
|
|
| 423 |
|
| 424 |
# Log initial memory state
|
| 425 |
logger.info(self.memory_manager.get_gpu_memory_info())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
def process_video(self,
|
| 428 |
video_path: str,
|
|
|
|
| 433 |
callback: Optional[callable] = None,
|
| 434 |
**kwargs) -> Tuple[str, str]:
|
| 435 |
"""
|
| 436 |
+
Main processing pipeline - video to video with alpha compositing.
|
| 437 |
|
| 438 |
Returns:
|
| 439 |
Tuple[str, str]: (final_output_path, status_message)
|
| 440 |
"""
|
| 441 |
try:
|
| 442 |
+
logger.info(f"🎬 Two-Stage Alpha Pipeline: {video_path}")
|
| 443 |
logger.info(f"🎯 Background: {background_path}")
|
| 444 |
logger.info(f"📁 Temp: {self.temp_dir}")
|
| 445 |
logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
|
|
|
|
| 505 |
else:
|
| 506 |
raise RuntimeError(f"Chunk {i+1} processing failed: {status}")
|
| 507 |
|
| 508 |
+
# Memory cleanup between chunks
|
| 509 |
logger.info(f"Cleaning up after chunk {i+1}...")
|
| 510 |
self.memory_manager.cleanup_stage(f"Chunk_{i+1}", force=True)
|
| 511 |
|
|
|
|
| 531 |
logger.error(f"Chunked processing failed: {e}")
|
| 532 |
raise
|
| 533 |
|
|
|
|
| 534 |
def _process_single_video(self,
|
| 535 |
video_path: str,
|
| 536 |
background_path: str,
|
|
|
|
| 562 |
self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
|
| 563 |
self.memory_manager.cleanup_stage("MatAnyone", force=True)
|
| 564 |
|
| 565 |
+
# Stage 3: Direct alpha compositing (no green screen!)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
if callback:
|
| 567 |
+
callback("Alpha compositing with background...", 70)
|
| 568 |
+
logger.info("STAGE 3: Direct alpha compositing with background...")
|
| 569 |
+
final_path = self._stage3_alpha_composite(video_path, alpha_video_path, background_path, output_path, quality)
|
| 570 |
|
| 571 |
# Final memory cleanup
|
| 572 |
self.memory_manager.cleanup_stage("Final")
|
|
|
|
| 605 |
logger.warning(f"Failed to cleanup: {e}")
|
| 606 |
|
| 607 |
# ==============================================================================
|
| 608 |
+
# CHAPTER 7: STAGE 1 - REFERENCE MASK CREATION (SAM2)
|
| 609 |
# ==============================================================================
|
| 610 |
|
|
|
|
| 611 |
def _stage1_create_reference_mask(self, video_path: str) -> str:
|
| 612 |
"""
|
| 613 |
Stage 1: Create robust reference mask from first few frames using SAM2.
|
|
|
|
| 730 |
return result
|
| 731 |
|
| 732 |
# ==============================================================================
|
| 733 |
+
# CHAPTER 8: STAGE 2 - MATANYONE PROCESSING
|
| 734 |
# ==============================================================================
|
| 735 |
|
|
|
|
| 736 |
def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
|
| 737 |
"""
|
| 738 |
Stage 2: Process entire video through MatAnyone using reference mask.
|
|
|
|
| 772 |
r_dilate=15,
|
| 773 |
suffix='pha',
|
| 774 |
save_image=False,
|
| 775 |
+
max_size=max_size # Use actual number instead of None
|
| 776 |
)
|
| 777 |
|
| 778 |
except Exception as e:
|
|
|
|
| 791 |
|
| 792 |
# Verify MatAnyone output
|
| 793 |
if not alpha_output_path or not os.path.exists(alpha_output_path):
|
|
|
|
| 794 |
files_created = os.listdir(matanyone_dir) if os.path.exists(matanyone_dir) else []
|
| 795 |
raise RuntimeError(f"MatAnyone did not create pha.mp4. Files created: {files_created}")
|
| 796 |
|
|
|
|
| 832 |
raise
|
| 833 |
|
| 834 |
# ==============================================================================
|
| 835 |
+
# CHAPTER 9: STAGE 3 - DIRECT ALPHA COMPOSITING (NO GREEN SCREEN!)
|
| 836 |
# ==============================================================================
|
| 837 |
|
| 838 |
+
def _stage3_alpha_composite(self, original_video_path: str, alpha_video_path: str,
|
| 839 |
+
background_path: str, output_path: str, quality: str) -> str:
|
| 840 |
"""
|
| 841 |
+
Stage 3: Direct alpha compositing without any green screen.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 842 |
|
| 843 |
+
Uses the alpha matte from MatAnyone to composite the person
|
| 844 |
+
directly onto the new background.
|
| 845 |
"""
|
| 846 |
try:
|
| 847 |
+
# Get quality profile
|
| 848 |
+
profile = QualityManager.get_profile(quality)
|
| 849 |
|
| 850 |
+
# Open videos
|
| 851 |
original_cap = cv2.VideoCapture(original_video_path)
|
| 852 |
alpha_cap = cv2.VideoCapture(alpha_video_path)
|
| 853 |
|
|
|
|
| 866 |
alpha_width = int(alpha_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 867 |
alpha_height = int(alpha_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 868 |
|
| 869 |
+
logger.info(f"Original video: {width}x{height} @ {fps} FPS, {total_frames} frames")
|
| 870 |
logger.info(f"Alpha video dimensions: {alpha_width}x{alpha_height}")
|
| 871 |
|
| 872 |
+
# Load and prepare background
|
| 873 |
+
if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
| 874 |
+
# Video background - process frame by frame
|
| 875 |
+
bg_cap = cv2.VideoCapture(background_path)
|
| 876 |
+
bg_is_video = True
|
| 877 |
+
else:
|
| 878 |
+
# Image background
|
| 879 |
+
bg_image = cv2.imread(background_path)
|
| 880 |
+
if bg_image is None:
|
| 881 |
+
raise RuntimeError(f"Cannot load background image: {background_path}")
|
| 882 |
+
# Resize to match video
|
| 883 |
+
bg_image = cv2.resize(bg_image, (width, height))
|
| 884 |
+
bg_is_video = False
|
| 885 |
+
|
| 886 |
+
# Setup video writer
|
| 887 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 888 |
+
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
| 889 |
|
| 890 |
frame_count = 0
|
| 891 |
while True:
|
|
|
|
| 896 |
if not ret_orig or not ret_alpha:
|
| 897 |
break
|
| 898 |
|
| 899 |
+
# Get background frame
|
| 900 |
+
if bg_is_video:
|
| 901 |
+
ret_bg, bg_frame = bg_cap.read()
|
| 902 |
+
if not ret_bg:
|
| 903 |
+
# Loop background if it's shorter
|
| 904 |
+
bg_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
| 905 |
+
ret_bg, bg_frame = bg_cap.read()
|
| 906 |
+
bg_frame = cv2.resize(bg_frame, (width, height))
|
| 907 |
+
else:
|
| 908 |
+
bg_frame = bg_image.copy()
|
| 909 |
+
|
| 910 |
# Convert alpha frame to grayscale mask
|
| 911 |
if len(alpha_frame.shape) == 3:
|
| 912 |
alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
|
| 913 |
else:
|
| 914 |
alpha_mask = alpha_frame
|
| 915 |
|
| 916 |
+
# Handle dimension mismatch - resize alpha to match original if needed
|
| 917 |
if alpha_mask.shape[:2] != orig_frame.shape[:2]:
|
| 918 |
if frame_count == 0: # Log only once
|
| 919 |
logger.info(f"Resizing alpha from {alpha_mask.shape[:2]} to {orig_frame.shape[:2]}")
|
|
|
|
| 922 |
# Normalize alpha to 0-1 range
|
| 923 |
alpha_normalized = alpha_mask.astype(np.float32) / 255.0
|
| 924 |
|
| 925 |
+
# Create 3-channel alpha for compositing
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
alpha_3d = np.stack([alpha_normalized] * 3, axis=2)
|
| 927 |
|
| 928 |
+
# Composite: background where alpha=0, person where alpha=1
|
| 929 |
orig_frame_float = orig_frame.astype(np.float32)
|
| 930 |
+
bg_frame_float = bg_frame.astype(np.float32)
|
| 931 |
|
| 932 |
+
# Direct alpha compositing
|
| 933 |
+
composite = alpha_3d * orig_frame_float + (1 - alpha_3d) * bg_frame_float
|
| 934 |
composite = np.clip(composite, 0, 255).astype(np.uint8)
|
| 935 |
|
| 936 |
# Write frame
|
| 937 |
out.write(composite)
|
| 938 |
frame_count += 1
|
| 939 |
|
| 940 |
+
# Progress logging
|
| 941 |
if frame_count % self.config.MEMORY_CHECK_INTERVAL == 0:
|
| 942 |
+
logger.info(f"Compositing progress: {frame_count}/{total_frames}")
|
| 943 |
if self.memory_manager.should_clear_memory():
|
| 944 |
+
logger.info("Memory high during compositing, clearing...")
|
| 945 |
self.memory_manager.clear_gpu_cache()
|
| 946 |
|
| 947 |
# Save debug frame occasionally
|
| 948 |
if self.config.SAVE_DEBUG_FILES and frame_count % self.config.DEBUG_FRAME_INTERVAL == 0:
|
| 949 |
+
debug_path = os.path.join(self.temp_dir, f"debug_composite_{frame_count:04d}.png")
|
| 950 |
cv2.imwrite(debug_path, composite)
|
| 951 |
|
| 952 |
# Cleanup
|
| 953 |
original_cap.release()
|
| 954 |
alpha_cap.release()
|
| 955 |
out.release()
|
| 956 |
+
if bg_is_video:
|
| 957 |
+
bg_cap.release()
|
| 958 |
|
| 959 |
if frame_count == 0:
|
| 960 |
+
raise RuntimeError("No frames processed for output video")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 961 |
|
| 962 |
+
# Verify output
|
| 963 |
if not os.path.exists(output_path):
|
| 964 |
+
raise RuntimeError("Output file was not created")
|
| 965 |
+
|
| 966 |
+
file_size = os.path.getsize(output_path) / (1024 * 1024) # MB
|
| 967 |
+
logger.info(f"✅ Alpha compositing completed: {output_path} ({file_size:.1f} MB)")
|
| 968 |
+
logger.info(f" Processed {frame_count} frames")
|
| 969 |
|
| 970 |
return output_path
|
| 971 |
|
| 972 |
except Exception as e:
|
| 973 |
+
logger.error(f"Stage 3 alpha compositing failed: {e}")
|
| 974 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 975 |
|
| 976 |
# ==============================================================================
|
| 977 |
+
# CHAPTER 10: DEBUG AND UTILITY FUNCTIONS
|
| 978 |
# ==============================================================================
|
| 979 |
|
| 980 |
def _debug_video_info(self, video_path: str, label: str = "Video"):
|
|
|
|
| 1031 |
return stats
|
| 1032 |
|
| 1033 |
# ==============================================================================
|
| 1034 |
+
# CHAPTER 11: EXPORT INTERFACE AND COMPATIBILITY
|
| 1035 |
# ==============================================================================
|
| 1036 |
|
| 1037 |
def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
|
|
|
|
| 1045 |
# Main execution example
|
| 1046 |
if __name__ == "__main__":
|
| 1047 |
# Example usage - replace with your actual handlers
|
| 1048 |
+
logger.info("TwoStageProcessor (Alpha Channel Version) loaded successfully")
|
| 1049 |
logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
|
| 1050 |
|
| 1051 |
# Print configuration
|
| 1052 |
config = ProcessingConfig()
|
| 1053 |
+
logger.info(f"Pipeline: SAM2 → MatAnyone → Direct Alpha Compositing")
|
| 1054 |
+
logger.info(f"No green screen or chroma key needed!")
|
| 1055 |
+
logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames")
|
| 1056 |
logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
|
| 1057 |
+
logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
|
|
|