MogensR commited on
Commit
df76738
·
verified ·
1 Parent(s): 789ff6d

Update processing/two_stage/two_stage_processor.py

Browse files
processing/two_stage/two_stage_processor.py CHANGED
@@ -1,14 +1,13 @@
1
  #!/usr/bin/env python3
2
  """
3
- EFFICIENT Two-Stage Green-Screen Processing System ✅ 2025-09-09
4
- VIDEO-TO-VIDEO PIPELINE: No PNG conversion, direct MP4 processing
5
  Stage 1: SAM2 creates reference mask from first 3 frames
6
- Stage 2: MatAnyone processes entire video → pha.mp4
7
- Stage 3: Create green screen video using pha.mp4 as alpha matte
8
- Stage 4: Adaptive chroma key compositing with background → final.mp4
9
  FEATURES:
10
- - Auto-detection of optimal chroma key threshold
11
- - Adaptive iteration to find perfect threshold
12
  - Chunked processing for long videos with memory management
13
  - Dimension mismatch handling
14
  - Memory optimization with light/deep cleanup modes
@@ -28,6 +27,7 @@
28
  import gc
29
  import time
30
  import traceback
 
31
  from pathlib import Path
32
  from typing import Optional, Tuple, Dict, Any, List
33
  from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips
@@ -41,14 +41,6 @@
41
  )
42
  logger = logging.getLogger(__name__)
43
 
44
- # Try to import sklearn, fallback gracefully
45
- try:
46
- from sklearn.cluster import KMeans
47
- SKLEARN_AVAILABLE = True
48
- except ImportError:
49
- SKLEARN_AVAILABLE = False
50
- logger.warning("scikit-learn not available, using fallback threshold detection")
51
-
52
  # PyTorch memory management
53
  try:
54
  import torch
@@ -72,21 +64,6 @@ class ProcessingConfig:
72
  CHUNK_OVERLAP_FRAMES = 5 # Frames to overlap between chunks for smooth transitions
73
  MAX_PROCESSING_RESOLUTION = None # Keep full resolution for chunks
74
 
75
- # Green screen settings
76
- GREEN_COLOR = (0, 255, 0) # RGB green for green screen
77
- GREEN_COLOR_NORMALIZED = (0.0, 1.0, 0.0) # Normalized for compositing
78
-
79
- # Auto-detection settings
80
- AUTO_DETECTION_FRAMES = 5 # Number of frames to analyze for auto-detection
81
- AUTO_DETECTION_FALLBACK = 0.1 # Fallback threshold if auto-detection fails
82
- MIN_THRESHOLD = 0.02 # Minimum allowed threshold
83
- MAX_THRESHOLD = 0.3 # Maximum allowed threshold
84
-
85
- # Adaptive optimization settings
86
- ADAPTIVE_MAX_ITERATIONS = 10 # Maximum iterations for threshold optimization
87
- ADAPTIVE_GREEN_TOLERANCE = 0.01 # Acceptable green residue level
88
- ADAPTIVE_TRANSPARENCY_TOLERANCE = 0.1 # Acceptable transparency quality
89
-
90
  # Quality settings
91
  VIDEO_CODEC = 'libx264'
92
  VIDEO_BITRATE = '8000k'
@@ -107,11 +84,11 @@ class ProcessingConfig:
107
  ENABLE_MEMORY_TESTING = False # DISABLED: Memory testing causes predictor deletion
108
 
109
  # ==============================================================================
110
- # CHAPTER 3: ENHANCED MEMORY MANAGEMENT UTILITIES
111
  # ==============================================================================
112
 
113
  class MemoryManager:
114
- """Enhanced utilities for managing GPU and system memory."""
115
 
116
  def __init__(self, config: ProcessingConfig):
117
  self.config = config
@@ -275,103 +252,8 @@ def get_memory_report(self) -> str:
275
  report.append("="*60)
276
  return "\n".join(report)
277
 
278
- # ==============================================================================
279
- # CHAPTER 4: MEMORY TESTING UTILITIES
280
- # ==============================================================================
281
-
282
- class MemoryTester:
283
- """Testing utilities for memory management verification."""
284
-
285
- @staticmethod
286
- def test_memory_cleanup(processor: 'TwoStageProcessor') -> Dict[str, Any]:
287
- """Test memory cleanup at each stage."""
288
- results = {
289
- 'initial_memory': None,
290
- 'post_sam2_memory': None,
291
- 'post_matanyone_memory': None,
292
- 'post_final_memory': None,
293
- 'cleanup_effective': False,
294
- 'memory_leaks': []
295
- }
296
-
297
- try:
298
- # Get initial memory
299
- results['initial_memory'] = processor.memory_manager.get_current_memory()
300
- logger.info(f"Initial memory: {results['initial_memory']['gpu_allocated']:.2f}GB")
301
-
302
- # Create dummy data for testing
303
- dummy_frame = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
304
-
305
- # Test SAM2 cleanup
306
- if hasattr(processor.sam2_handler, 'create_mask'):
307
- logger.info("Testing SAM2 memory cleanup...")
308
- _ = processor._create_sam2_mask(dummy_frame)
309
- pre_cleanup = processor.memory_manager.get_current_memory()
310
- # Use deep cleanup for testing only
311
- processor.memory_manager.cleanup_model(processor.sam2_handler, "SAM2", deep_cleanup=True)
312
- processor.memory_manager.cleanup_stage("SAM2_test", force=True)
313
- post_cleanup = processor.memory_manager.get_current_memory()
314
-
315
- results['post_sam2_memory'] = post_cleanup
316
- sam2_freed = pre_cleanup['gpu_allocated'] - post_cleanup['gpu_allocated']
317
-
318
- if sam2_freed > 0:
319
- logger.info(f"SAM2 cleanup freed {sam2_freed:.2f}GB")
320
- else:
321
- results['memory_leaks'].append("SAM2 cleanup ineffective")
322
-
323
- # Check if memory is properly freed
324
- final_memory = processor.memory_manager.get_current_memory()
325
- results['post_final_memory'] = final_memory
326
-
327
- # Determine if cleanup was effective
328
- memory_increase = final_memory['gpu_allocated'] - results['initial_memory']['gpu_allocated']
329
- results['cleanup_effective'] = memory_increase < 0.1 # Less than 100MB increase
330
-
331
- if not results['cleanup_effective']:
332
- results['memory_leaks'].append(f"Memory increased by {memory_increase:.2f}GB")
333
-
334
- # Generate report
335
- logger.info(processor.memory_manager.get_memory_report())
336
-
337
- except Exception as e:
338
- logger.error(f"Memory testing failed: {e}")
339
- results['error'] = str(e)
340
-
341
- return results
342
-
343
- @staticmethod
344
- def monitor_memory_during_processing(func):
345
- """Decorator to monitor memory during a function call."""
346
- def wrapper(*args, **kwargs):
347
- if not TORCH_AVAILABLE:
348
- return func(*args, **kwargs)
349
-
350
- start_memory = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
351
- start_time = time.time()
352
-
353
- try:
354
- result = func(*args, **kwargs)
355
-
356
- end_memory = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
357
- end_time = time.time()
358
-
359
- memory_used = (end_memory - start_memory) / 1024**3
360
- time_taken = end_time - start_time
361
-
362
- func_name = func.__name__
363
- logger.info(f"{func_name}: {time_taken:.1f}s, {memory_used:.2f}GB memory delta")
364
-
365
- return result
366
-
367
- except Exception as e:
368
- logger.error(f"Error in {func.__name__}: {e}")
369
- raise
370
-
371
- return wrapper
372
-
373
  # ==============================================================================
374
- # CHAPTER 5: QUALITY MANAGER
375
  # ==============================================================================
376
 
377
  class QualityManager:
@@ -404,7 +286,7 @@ def get_profile(cls, quality: str = 'medium') -> Dict[str, Any]:
404
  return cls.PROFILES.get(quality, cls.PROFILES['medium'])
405
 
406
  # ==============================================================================
407
- # CHAPTER 6: CHUNKED VIDEO PROCESSOR
408
  # ==============================================================================
409
 
410
  class ChunkedVideoProcessor:
@@ -511,21 +393,19 @@ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str)
511
  raise
512
 
513
  # ==============================================================================
514
- # CHAPTER 7: TWOSTAGEPROCESSOR CLASS DEFINITION
515
  # ==============================================================================
516
 
517
  class TwoStageProcessor:
518
  """
519
- Efficient two-stage green screen processor with video-to-video pipeline.
520
 
521
- This processor avoids PNG conversion by working directly with MP4 files:
522
  1. SAM2 creates reference mask from first few frames
523
- 2. MatAnyone processes entire video using reference mask → pha.mp4
524
- 3. Create green screen video using pha.mp4 as alpha matte
525
- 4. Adaptive chroma key compositing with background → final.mp4
526
- 5. Auto-detects and iteratively optimizes chroma key threshold
527
- 6. Chunked processing for long videos with memory management
528
- 7. Dimension mismatch handling for robust processing
529
  """
530
 
531
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
@@ -536,7 +416,6 @@ def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = No
536
  self.config = ProcessingConfig()
537
  self.memory_manager = MemoryManager(self.config)
538
  self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
539
- self.memory_tester = MemoryTester()
540
 
541
  # Ensure temp directory exists
542
  os.makedirs(self.temp_dir, exist_ok=True)
@@ -544,29 +423,6 @@ def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = No
544
 
545
  # Log initial memory state
546
  logger.info(self.memory_manager.get_gpu_memory_info())
547
-
548
- # Memory test DISABLED - it deletes the predictor causing failures
549
- if self.config.ENABLE_MEMORY_TESTING:
550
- logger.info("Memory testing is disabled to prevent predictor deletion")
551
- # self.run_memory_test() # DO NOT RUN - causes predictor deletion
552
-
553
- def run_memory_test(self):
554
- """Run memory management tests. WARNING: This will delete the predictor!"""
555
- try:
556
- logger.warning("Running memory tests - this may affect handler state!")
557
- test_results = self.memory_tester.test_memory_cleanup(self)
558
-
559
- if test_results['cleanup_effective']:
560
- logger.info("✅ Memory management tests passed")
561
- else:
562
- logger.warning(f"⚠️ Memory management issues detected: {test_results['memory_leaks']}")
563
-
564
- # Try to restore predictor after test
565
- if hasattr(self.sam2_handler, 'predictor') and self.sam2_handler.predictor is None:
566
- logger.warning("SAM2 predictor was deleted by memory test - manual restoration required")
567
-
568
- except Exception as e:
569
- logger.warning(f"Memory tests failed: {e}")
570
 
571
  def process_video(self,
572
  video_path: str,
@@ -577,13 +433,13 @@ def process_video(self,
577
  callback: Optional[callable] = None,
578
  **kwargs) -> Tuple[str, str]:
579
  """
580
- Main processing pipeline - video to video with chunked processing.
581
 
582
  Returns:
583
  Tuple[str, str]: (final_output_path, status_message)
584
  """
585
  try:
586
- logger.info(f"🎬 Two-Stage Video Pipeline: {video_path}")
587
  logger.info(f"🎯 Background: {background_path}")
588
  logger.info(f"📁 Temp: {self.temp_dir}")
589
  logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
@@ -649,7 +505,7 @@ def _process_chunked_video(self,
649
  else:
650
  raise RuntimeError(f"Chunk {i+1} processing failed: {status}")
651
 
652
- # Aggressive memory cleanup between chunks - but don't delete predictor
653
  logger.info(f"Cleaning up after chunk {i+1}...")
654
  self.memory_manager.cleanup_stage(f"Chunk_{i+1}", force=True)
655
 
@@ -675,7 +531,6 @@ def _process_chunked_video(self,
675
  logger.error(f"Chunked processing failed: {e}")
676
  raise
677
 
678
- @MemoryTester.monitor_memory_during_processing
679
  def _process_single_video(self,
680
  video_path: str,
681
  background_path: str,
@@ -707,21 +562,11 @@ def _process_single_video(self,
707
  self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
708
  self.memory_manager.cleanup_stage("MatAnyone", force=True)
709
 
710
- # Stage 3: Create green screen video
711
- if callback:
712
- callback("Creating green screen intermediate video...", 70)
713
- logger.info("STAGE 3: Creating green screen intermediate video...")
714
- green_screen_path = self._stage3_create_green_screen_video(video_path, alpha_video_path)
715
-
716
- # Memory cleanup after green screen
717
- if self.memory_manager.should_clear_memory():
718
- self.memory_manager.cleanup_stage("GreenScreen")
719
-
720
- # Stage 4: Adaptive final compositing
721
  if callback:
722
- callback("Adaptive compositing with background...", 90)
723
- logger.info("STAGE 4: Adaptive compositing with background...")
724
- final_path = self._stage4_adaptive_compositing(green_screen_path, background_path, output_path, quality)
725
 
726
  # Final memory cleanup
727
  self.memory_manager.cleanup_stage("Final")
@@ -760,10 +605,9 @@ def cleanup(self):
760
  logger.warning(f"Failed to cleanup: {e}")
761
 
762
  # ==============================================================================
763
- # CHAPTER 8: STAGE 1 - REFERENCE MASK CREATION (SAM2)
764
  # ==============================================================================
765
 
766
- @MemoryTester.monitor_memory_during_processing
767
  def _stage1_create_reference_mask(self, video_path: str) -> str:
768
  """
769
  Stage 1: Create robust reference mask from first few frames using SAM2.
@@ -886,10 +730,9 @@ def _combine_reference_masks(self, masks: list) -> np.ndarray:
886
  return result
887
 
888
  # ==============================================================================
889
- # CHAPTER 9: STAGE 2 - MATANYONE PROCESSING WITH FIXED PARAMETERS
890
  # ==============================================================================
891
 
892
- @MemoryTester.monitor_memory_during_processing
893
  def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
894
  """
895
  Stage 2: Process entire video through MatAnyone using reference mask.
@@ -929,7 +772,7 @@ def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str
929
  r_dilate=15,
930
  suffix='pha',
931
  save_image=False,
932
- max_size=max_size # FIXED: Use actual number instead of None
933
  )
934
 
935
  except Exception as e:
@@ -948,7 +791,6 @@ def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str
948
 
949
  # Verify MatAnyone output
950
  if not alpha_output_path or not os.path.exists(alpha_output_path):
951
- # List what files were actually created
952
  files_created = os.listdir(matanyone_dir) if os.path.exists(matanyone_dir) else []
953
  raise RuntimeError(f"MatAnyone did not create pha.mp4. Files created: {files_created}")
954
 
@@ -990,24 +832,22 @@ def _trim_video(self, input_path: str, output_path: str, seconds: int):
990
  raise
991
 
992
  # ==============================================================================
993
- # CHAPTER 10: STAGE 3 - GREEN SCREEN VIDEO CREATION WITH DIMENSION FIX
994
  # ==============================================================================
995
 
996
- @MemoryTester.monitor_memory_during_processing
997
- def _stage3_create_green_screen_video(self, original_video_path: str, alpha_video_path: str) -> str:
998
  """
999
- Stage 3: Create green screen intermediate video using alpha matte.
1000
-
1001
- Uses the alpha video from MatAnyone to create a green screen version:
1002
- - Where alpha = 1 (person): keep original video
1003
- - Where alpha = 0 (background): make it green (#00FF00)
1004
 
1005
- Returns path to green screen video.
 
1006
  """
1007
  try:
1008
- green_screen_path = os.path.join(self.temp_dir, "green_screen.mp4")
 
1009
 
1010
- # Open both videos
1011
  original_cap = cv2.VideoCapture(original_video_path)
1012
  alpha_cap = cv2.VideoCapture(alpha_video_path)
1013
 
@@ -1026,12 +866,26 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
1026
  alpha_width = int(alpha_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
1027
  alpha_height = int(alpha_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
1028
 
1029
- logger.info(f"Green screen processing: {width}x{height} @ {fps} FPS, {total_frames} frames")
1030
  logger.info(f"Alpha video dimensions: {alpha_width}x{alpha_height}")
1031
 
1032
- # Setup video writer with original dimensions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1033
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
1034
- out = cv2.VideoWriter(green_screen_path, fourcc, fps, (width, height))
1035
 
1036
  frame_count = 0
1037
  while True:
@@ -1042,13 +896,24 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
1042
  if not ret_orig or not ret_alpha:
1043
  break
1044
 
 
 
 
 
 
 
 
 
 
 
 
1045
  # Convert alpha frame to grayscale mask
1046
  if len(alpha_frame.shape) == 3:
1047
  alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
1048
  else:
1049
  alpha_mask = alpha_frame
1050
 
1051
- # FIX: Handle dimension mismatch - resize alpha to match original if needed
1052
  if alpha_mask.shape[:2] != orig_frame.shape[:2]:
1053
  if frame_count == 0: # Log only once
1054
  logger.info(f"Resizing alpha from {alpha_mask.shape[:2]} to {orig_frame.shape[:2]}")
@@ -1057,515 +922,59 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
1057
  # Normalize alpha to 0-1 range
1058
  alpha_normalized = alpha_mask.astype(np.float32) / 255.0
1059
 
1060
- # Create green background
1061
- green_bg = np.full_like(orig_frame, self.config.GREEN_COLOR, dtype=np.uint8)
1062
-
1063
- # Composite: person where alpha=1, green where alpha=0
1064
- # alpha_3d for broadcasting across color channels
1065
  alpha_3d = np.stack([alpha_normalized] * 3, axis=2)
1066
 
1067
- # CRITICAL: Ensure both inputs are same data type
1068
  orig_frame_float = orig_frame.astype(np.float32)
1069
- green_bg_float = green_bg.astype(np.float32)
1070
 
1071
- # Composite with proper scaling
1072
- composite = alpha_3d * orig_frame_float + (1 - alpha_3d) * green_bg_float
1073
  composite = np.clip(composite, 0, 255).astype(np.uint8)
1074
 
1075
  # Write frame
1076
  out.write(composite)
1077
  frame_count += 1
1078
 
1079
- # Debug logging and memory check
1080
  if frame_count % self.config.MEMORY_CHECK_INTERVAL == 0:
1081
- logger.info(f"Green screen progress: {frame_count}/{total_frames}")
1082
  if self.memory_manager.should_clear_memory():
1083
- logger.info("Memory high during green screen creation, clearing...")
1084
  self.memory_manager.clear_gpu_cache()
1085
 
1086
  # Save debug frame occasionally
1087
  if self.config.SAVE_DEBUG_FILES and frame_count % self.config.DEBUG_FRAME_INTERVAL == 0:
1088
- debug_path = os.path.join(self.temp_dir, f"debug_green_frame_{frame_count:04d}.png")
1089
  cv2.imwrite(debug_path, composite)
1090
 
1091
  # Cleanup
1092
  original_cap.release()
1093
  alpha_cap.release()
1094
  out.release()
 
 
1095
 
1096
  if frame_count == 0:
1097
- raise RuntimeError("No frames processed for green screen video")
1098
-
1099
- logger.info(f"✅ Green screen video created: {frame_count} frames → {green_screen_path}")
1100
- return green_screen_path
1101
-
1102
- except Exception as e:
1103
- logger.error(f"Stage 3 failed: {e}")
1104
- raise
1105
-
1106
- # ==============================================================================
1107
- # CHAPTER 11: STAGE 4 - ADAPTIVE CHROMA KEY COMPOSITING
1108
- # ==============================================================================
1109
-
1110
- @MemoryTester.monitor_memory_during_processing
1111
- def _stage4_adaptive_compositing(self, green_screen_path: str, background_path: str,
1112
- output_path: str, quality: str) -> str:
1113
- """
1114
- Stage 4: Final compositing with adaptive threshold optimization.
1115
-
1116
- Iteratively adjusts chroma key threshold until green is properly removed.
1117
- """
1118
- try:
1119
- # Get quality profile
1120
- profile = QualityManager.get_profile(quality)
1121
-
1122
- # Load videos/images
1123
- green_clip = VideoFileClip(green_screen_path)
1124
-
1125
- if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
1126
- bg_clip = VideoFileClip(background_path)
1127
- if bg_clip.duration < green_clip.duration:
1128
- bg_clip = bg_clip.loop(duration=green_clip.duration)
1129
- bg_clip = bg_clip.subclip(0, green_clip.duration)
1130
- else:
1131
- bg_clip = ImageClip(background_path, duration=green_clip.duration)
1132
-
1133
- bg_clip = bg_clip.resize((green_clip.w, green_clip.h))
1134
-
1135
- # Start with auto-detected threshold
1136
- initial_threshold = self._auto_detect_chroma_threshold(green_screen_path)
1137
- logger.info(f"Initial auto-detected threshold: {initial_threshold:.4f}")
1138
-
1139
- # Adaptive optimization
1140
- best_threshold = self._optimize_chroma_threshold(
1141
- green_clip, bg_clip, initial_threshold,
1142
- max_iterations=self.config.ADAPTIVE_MAX_ITERATIONS
1143
- )
1144
-
1145
- logger.info(f"✅ Optimized threshold: {best_threshold:.4f} (started from {initial_threshold:.4f})")
1146
-
1147
- # Apply final chroma key with optimized threshold
1148
- green_screen_keyed = green_clip.fx(vfx.mask_color,
1149
- color=self.config.GREEN_COLOR_NORMALIZED,
1150
- thr=best_threshold,
1151
- s=0.1) # Reduced smoothing for sharper edges
1152
-
1153
- # Composite and write
1154
- final_clip = CompositeVideoClip([bg_clip, green_screen_keyed])
1155
-
1156
- write_params = {
1157
- 'codec': self.config.VIDEO_CODEC,
1158
- 'bitrate': profile['bitrate'],
1159
- 'audio_codec': self.config.AUDIO_CODEC,
1160
- 'verbose': False,
1161
- 'logger': None
1162
- }
1163
-
1164
- if 'crf' in profile:
1165
- write_params['ffmpeg_params'] = ['-crf', str(profile['crf'])]
1166
-
1167
- final_clip.write_videofile(output_path, **write_params)
1168
-
1169
- # Cleanup
1170
- green_clip.close()
1171
- bg_clip.close()
1172
- final_clip.close()
1173
 
 
1174
  if not os.path.exists(output_path):
1175
- raise RuntimeError("Final output file was not created")
1176
-
1177
- file_size = os.path.getsize(output_path) / (1024 * 1024)
1178
- logger.info(f"✅ Adaptive compositing completed: {output_path} ({file_size:.1f} MB)")
 
1179
 
1180
  return output_path
1181
 
1182
  except Exception as e:
1183
- logger.error(f"Stage 4 adaptive failed: {e}")
1184
  raise
1185
-
1186
- def _optimize_chroma_threshold(self, green_clip, bg_clip, initial_threshold: float,
1187
- max_iterations: int = 10) -> float:
1188
- """
1189
- Iteratively optimize chroma key threshold by analyzing output quality.
1190
- """
1191
- threshold = initial_threshold
1192
- best_threshold = threshold
1193
- best_score = float('inf')
1194
-
1195
- # Binary search bounds
1196
- low = max(self.config.MIN_THRESHOLD, initial_threshold * 0.5)
1197
- high = min(self.config.MAX_THRESHOLD, initial_threshold * 2.0)
1198
-
1199
- # Track tested thresholds to avoid repetition
1200
- tested_thresholds = set()
1201
-
1202
- for iteration in range(max_iterations):
1203
- # Round threshold to avoid tiny differences
1204
- threshold = round(threshold, 4)
1205
-
1206
- # Skip if already tested
1207
- if threshold in tested_thresholds:
1208
- logger.info(f" Threshold {threshold:.4f} already tested, adjusting...")
1209
- threshold = (low + high) / 2
1210
- continue
1211
-
1212
- tested_thresholds.add(threshold)
1213
- logger.info(f"🔄 Optimization iteration {iteration + 1}/{max_iterations}, testing threshold: {threshold:.4f}")
1214
-
1215
- # Apply chroma key with current threshold
1216
- keyed = green_clip.fx(vfx.mask_color,
1217
- color=self.config.GREEN_COLOR_NORMALIZED,
1218
- thr=threshold,
1219
- s=0.1)
1220
-
1221
- # Composite
1222
- test_composite = CompositeVideoClip([bg_clip, keyed])
1223
-
1224
- # Extract test frames (beginning, middle, end)
1225
- test_times = [
1226
- green_clip.duration * 0.1,
1227
- green_clip.duration * 0.5,
1228
- green_clip.duration * 0.9
1229
- ]
1230
-
1231
- total_green_score = 0
1232
- total_transparency_score = 0
1233
-
1234
- for test_time in test_times:
1235
- test_frame = test_composite.get_frame(test_time)
1236
- bg_frame = bg_clip.get_frame(test_time)
1237
-
1238
- # Analyze the frame for green residue
1239
- green_score = self._analyze_green_residue(test_frame)
1240
- transparency_score = self._analyze_transparency_quality(test_frame, bg_frame)
1241
-
1242
- total_green_score += green_score
1243
- total_transparency_score += transparency_score
1244
-
1245
- # Average scores
1246
- avg_green_score = total_green_score / len(test_times)
1247
- avg_transparency_score = total_transparency_score / len(test_times)
1248
-
1249
- # Combined score (lower is better)
1250
- total_score = avg_green_score + avg_transparency_score * 0.5
1251
-
1252
- logger.info(f" 📊 Green residue: {avg_green_score:.4f}, Transparency: {avg_transparency_score:.4f}, Total: {total_score:.4f}")
1253
-
1254
- # Update best if improved
1255
- if total_score < best_score:
1256
- best_score = total_score
1257
- best_threshold = threshold
1258
- logger.info(f" ✅ New best threshold: {best_threshold:.4f} (score: {best_score:.4f})")
1259
-
1260
- # Check if we're good enough
1261
- if avg_green_score < self.config.ADAPTIVE_GREEN_TOLERANCE and \
1262
- avg_transparency_score < self.config.ADAPTIVE_TRANSPARENCY_TOLERANCE:
1263
- logger.info(f" 🎯 Acceptable quality reached! Stopping optimization.")
1264
- break
1265
-
1266
- # Adjust threshold using binary search
1267
- if avg_green_score > 0.05: # Too much green remains
1268
- logger.info(f" 🟢 Too much green, decreasing threshold")
1269
- high = threshold
1270
- threshold = (low + threshold) / 2
1271
- elif avg_transparency_score > 0.3: # Too much was removed
1272
- logger.info(f" 👤 Subject too transparent, increasing threshold")
1273
- low = threshold
1274
- threshold = (threshold + high) / 2
1275
- else:
1276
- # Fine-tune around current value
1277
- if avg_green_score > avg_transparency_score:
1278
- threshold *= 0.95 # Slightly more aggressive
1279
- else:
1280
- threshold *= 1.05 # Slightly less aggressive
1281
-
1282
- # Ensure we stay in bounds
1283
- threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
1284
-
1285
- # Clean up test composite
1286
- test_composite.close()
1287
- keyed.close()
1288
-
1289
- # Stop if converged
1290
- if abs(threshold - best_threshold) < 0.001 and iteration > 3:
1291
- logger.info(" 📍 Converged, stopping optimization")
1292
- break
1293
-
1294
- return best_threshold
1295
-
1296
- def _analyze_green_residue(self, frame: np.ndarray) -> float:
1297
- """
1298
- Analyze how much green remains in the frame.
1299
- Returns score from 0 (no green) to 1 (lots of green).
1300
- """
1301
- # Convert to float
1302
- img = frame.astype(np.float32) / 255.0
1303
-
1304
- # Detect pure green pixels
1305
- green_pixels = (
1306
- (img[:,:,1] > 0.7) & # High green
1307
- (img[:,:,0] < 0.3) & # Low red
1308
- (img[:,:,2] < 0.3) # Low blue
1309
- )
1310
-
1311
- # Calculate percentage of green pixels
1312
- green_ratio = np.sum(green_pixels) / (frame.shape[0] * frame.shape[1])
1313
-
1314
- # Also check for greenish tint in other pixels
1315
- greenish_pixels = (
1316
- (img[:,:,1] > img[:,:,0] * 1.5) & # Green > Red * 1.5
1317
- (img[:,:,1] > img[:,:,2] * 1.5) & # Green > Blue * 1.5
1318
- (img[:,:,1] > 0.4) # Significant green
1319
- )
1320
-
1321
- greenish_ratio = np.sum(greenish_pixels) / (frame.shape[0] * frame.shape[1])
1322
-
1323
- # Combined score
1324
- score = green_ratio + greenish_ratio * 0.3
1325
-
1326
- return min(1.0, score)
1327
-
1328
- def _analyze_transparency_quality(self, composite_frame: np.ndarray, bg_frame: np.ndarray) -> float:
1329
- """
1330
- Analyze if too much of the subject was removed.
1331
- Returns score from 0 (good) to 1 (too much removed).
1332
- """
1333
- # Calculate difference between composite and background
1334
- diff = np.abs(composite_frame.astype(np.float32) - bg_frame.astype(np.float32))
1335
-
1336
- # Sum of differences (more difference = more of subject preserved)
1337
- total_diff = np.sum(diff) / (255.0 * 3 * composite_frame.shape[0] * composite_frame.shape[1])
1338
-
1339
- # If difference is too small, too much was removed
1340
- if total_diff < 0.05: # Less than 5% different from background
1341
- return 1.0 # Bad - subject was removed
1342
- elif total_diff > 0.3: # More than 30% different
1343
- return 0.0 # Good - subject well preserved
1344
- else:
1345
- # Linear interpolation
1346
- return 1.0 - (total_diff - 0.05) / 0.25
1347
-
1348
- # ==============================================================================
1349
- # CHAPTER 12: AUTO CHROMA KEY THRESHOLD DETECTION
1350
- # ==============================================================================
1351
-
1352
- def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
1353
- """
1354
- Auto-detect initial chroma key threshold by analyzing green screen video.
1355
-
1356
- This method:
1357
- 1. Samples frames from the green screen video
1358
- 2. Identifies green background pixels vs person pixels
1359
- 3. Calculates color distance between greenest background and person
1360
- 4. Sets threshold as percentage of that distance for initial guess
1361
- """
1362
- try:
1363
- logger.info("Analyzing green screen video for initial threshold detection...")
1364
-
1365
- # Open green screen video
1366
- cap = cv2.VideoCapture(green_screen_path)
1367
- if not cap.isOpened():
1368
- logger.warning("Cannot open green screen video for analysis")
1369
- return self.config.AUTO_DETECTION_FALLBACK
1370
-
1371
- # Get video properties
1372
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
1373
-
1374
- # Sample frames evenly across the video
1375
- frame_indices = np.linspace(0, total_frames - 1,
1376
- min(self.config.AUTO_DETECTION_FRAMES, total_frames),
1377
- dtype=int)
1378
-
1379
- green_pixels = []
1380
- person_pixels = []
1381
-
1382
- for frame_idx in frame_indices:
1383
- # Seek to specific frame
1384
- cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
1385
- ret, frame = cap.read()
1386
-
1387
- if not ret:
1388
- continue
1389
-
1390
- # Convert BGR to RGB for analysis
1391
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
1392
-
1393
- # Analyze this frame
1394
- green_px, person_px = self._analyze_frame_colors(frame_rgb)
1395
- green_pixels.extend(green_px)
1396
- person_pixels.extend(person_px)
1397
-
1398
- cap.release()
1399
-
1400
- if len(green_pixels) == 0 or len(person_pixels) == 0:
1401
- logger.warning("Insufficient color data for auto-detection")
1402
- return self.config.AUTO_DETECTION_FALLBACK
1403
-
1404
- # Convert to numpy arrays
1405
- green_pixels = np.array(green_pixels)
1406
- person_pixels = np.array(person_pixels)
1407
-
1408
- logger.info(f"Analyzed {len(green_pixels)} green pixels and {len(person_pixels)} person pixels")
1409
-
1410
- # Calculate initial threshold
1411
- threshold = self._calculate_initial_threshold(green_pixels, person_pixels)
1412
-
1413
- # Clamp to safe range
1414
- threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
1415
-
1416
- logger.info(f"Initial threshold calculated: {threshold:.4f}")
1417
- return threshold
1418
-
1419
- except Exception as e:
1420
- logger.warning(f"Auto-detection failed: {e}, using fallback")
1421
- return self.config.AUTO_DETECTION_FALLBACK
1422
-
1423
- def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
1424
- """
1425
- Analyze a single frame to identify green pixels vs person pixels.
1426
-
1427
- Returns:
1428
- Tuple[list, list]: (green_pixels, person_pixels) as lists of RGB values
1429
- """
1430
- try:
1431
- # Convert to normalized float
1432
- frame_norm = frame_rgb.astype(np.float32) / 255.0
1433
-
1434
- # Identify likely green pixels (high green, low red/blue)
1435
- green_mask = (
1436
- (frame_norm[:, :, 1] > 0.7) & # High green
1437
- (frame_norm[:, :, 0] < 0.3) & # Low red
1438
- (frame_norm[:, :, 2] < 0.3) # Low blue
1439
- )
1440
-
1441
- # Identify likely person pixels (balanced colors, not green-dominant)
1442
- person_mask = (
1443
- (frame_norm[:, :, 1] < 0.6) | # Not too green
1444
- ((frame_norm[:, :, 0] > 0.2) & (frame_norm[:, :, 2] > 0.2)) # Some red and blue
1445
- )
1446
-
1447
- # Sample pixels (subsample for performance)
1448
- # Sample green pixels
1449
- green_coords = np.where(green_mask)
1450
- if len(green_coords[0]) > 1000:
1451
- indices = np.random.choice(len(green_coords[0]), 1000, replace=False)
1452
- green_coords = (green_coords[0][indices], green_coords[1][indices])
1453
-
1454
- green_pixels = frame_norm[green_coords].tolist()
1455
-
1456
- # Sample person pixels
1457
- person_coords = np.where(person_mask)
1458
- if len(person_coords[0]) > 1000:
1459
- indices = np.random.choice(len(person_coords[0]), 1000, replace=False)
1460
- person_coords = (person_coords[0][indices], person_coords[1][indices])
1461
-
1462
- person_pixels = frame_norm[person_coords].tolist()
1463
-
1464
- return green_pixels, person_pixels
1465
-
1466
- except Exception as e:
1467
- logger.warning(f"Frame color analysis failed: {e}")
1468
- return [], []
1469
-
1470
- def _calculate_initial_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
1471
- """
1472
- Calculate initial threshold based on color analysis.
1473
-
1474
- Strategy:
1475
- 1. Find the "least green" green pixels (edge of green screen)
1476
- 2. Find the "most green" person pixels (skin tones, clothing with green)
1477
- 3. Calculate color distance between these clusters
1478
- 4. Set threshold as percentage of that distance
1479
- """
1480
- try:
1481
- if SKLEARN_AVAILABLE and len(green_pixels) > 50 and len(person_pixels) > 50:
1482
- # Convert to LAB color space for better perceptual distance
1483
- green_lab = self._rgb_to_lab_batch(green_pixels)
1484
- person_lab = self._rgb_to_lab_batch(person_pixels)
1485
-
1486
- # Use clustering to find representative colors
1487
- kmeans_green = KMeans(n_clusters=min(5, len(green_lab)//10), random_state=42, n_init=10)
1488
- green_clusters = kmeans_green.fit_predict(green_lab)
1489
- green_centers = kmeans_green.cluster_centers_
1490
-
1491
- # Find cluster closest to the target green
1492
- target_green_lab = self._rgb_to_lab(np.array([[[0, 1, 0]]]))[0][0]
1493
- distances_to_target = np.linalg.norm(green_centers - target_green_lab, axis=1)
1494
- main_green_cluster = green_centers[np.argmin(distances_to_target)]
1495
-
1496
- # Find the most green of the person pixels
1497
- kmeans_person = KMeans(n_clusters=min(5, len(person_lab)//10), random_state=42, n_init=10)
1498
- person_clusters = kmeans_person.fit_predict(person_lab)
1499
- person_centers = kmeans_person.cluster_centers_
1500
-
1501
- # Find person cluster closest to green
1502
- distances_to_green = np.linalg.norm(person_centers - main_green_cluster, axis=1)
1503
- closest_person_cluster = person_centers[np.argmin(distances_to_green)]
1504
-
1505
- # Calculate color distance
1506
- color_distance = np.linalg.norm(main_green_cluster - closest_person_cluster)
1507
-
1508
- # Convert LAB distance to threshold
1509
- # Start conservative - use 40% of distance for initial guess
1510
- threshold = (color_distance / 100.0) * 0.4
1511
-
1512
- logger.info(f"Color distance analysis: LAB distance={color_distance:.2f}, initial threshold={threshold:.3f}")
1513
-
1514
- return threshold
1515
- else:
1516
- # Fallback: analyze in RGB space
1517
- return self._simple_rgb_threshold(green_pixels, person_pixels)
1518
-
1519
- except Exception as e:
1520
- logger.warning(f"Threshold calculation failed: {e}")
1521
- return self._simple_rgb_threshold(green_pixels, person_pixels)
1522
-
1523
- def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
1524
- """Fallback RGB-based threshold calculation."""
1525
- try:
1526
- # Find average green pixel
1527
- avg_green = np.mean(green_pixels, axis=0)
1528
-
1529
- # Find person pixel closest to green
1530
- green_distances = np.linalg.norm(person_pixels - avg_green, axis=1)
1531
- min_distance = np.min(green_distances)
1532
-
1533
- # Use 50% of minimum distance as initial threshold
1534
- threshold = min_distance * 0.5
1535
-
1536
- logger.info(f"RGB fallback: min_distance={min_distance:.3f}, threshold={threshold:.3f}")
1537
- return threshold
1538
-
1539
- except Exception as e:
1540
- logger.warning(f"RGB fallback failed: {e}")
1541
- return self.config.AUTO_DETECTION_FALLBACK
1542
-
1543
- def _rgb_to_lab_batch(self, rgb_batch: np.ndarray) -> np.ndarray:
1544
- """Convert batch of RGB values to LAB color space."""
1545
- try:
1546
- # Reshape for OpenCV
1547
- rgb_reshaped = rgb_batch.reshape(-1, 1, 3).astype(np.float32)
1548
-
1549
- # Convert to LAB
1550
- lab = cv2.cvtColor(rgb_reshaped, cv2.COLOR_RGB2LAB)
1551
-
1552
- # Reshape back
1553
- return lab.reshape(-1, 3)
1554
-
1555
- except Exception as e:
1556
- logger.warning(f"LAB conversion failed: {e}")
1557
- return rgb_batch # Return RGB as fallback
1558
-
1559
- def _rgb_to_lab(self, rgb: np.ndarray) -> np.ndarray:
1560
- """Convert single RGB image to LAB."""
1561
- try:
1562
- return cv2.cvtColor(rgb.astype(np.float32), cv2.COLOR_RGB2LAB)
1563
- except Exception as e:
1564
- logger.warning(f"Single LAB conversion failed: {e}")
1565
- return rgb
1566
 
1567
  # ==============================================================================
1568
- # CHAPTER 13: DEBUG AND UTILITY FUNCTIONS
1569
  # ==============================================================================
1570
 
1571
  def _debug_video_info(self, video_path: str, label: str = "Video"):
@@ -1622,7 +1031,7 @@ def get_processing_stats(self) -> Dict[str, Any]:
1622
  return stats
1623
 
1624
  # ==============================================================================
1625
- # CHAPTER 14: EXPORT INTERFACE AND COMPATIBILITY
1626
  # ==============================================================================
1627
 
1628
  def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
@@ -1636,14 +1045,13 @@ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
1636
  # Main execution example
1637
  if __name__ == "__main__":
1638
  # Example usage - replace with your actual handlers
1639
- logger.info("TwoStageProcessor module loaded successfully")
1640
  logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
1641
 
1642
  # Print configuration
1643
  config = ProcessingConfig()
1644
- logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames, green={config.GREEN_COLOR}")
1645
- logger.info(f"Auto-detection: {config.AUTO_DETECTION_FRAMES} analysis frames, fallback={config.AUTO_DETECTION_FALLBACK}")
1646
- logger.info(f"Adaptive optimization: {config.ADAPTIVE_MAX_ITERATIONS} max iterations")
1647
  logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
1648
- logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
1649
- logger.info(f"Memory testing: {'DISABLED' if not config.ENABLE_MEMORY_TESTING else 'ENABLED'}")
 
1
  #!/usr/bin/env python3
2
  """
3
+ EFFICIENT Two-Stage Alpha Channel Processing System ✅ 2025-09-09
4
+ VIDEO-TO-VIDEO PIPELINE: Direct alpha compositing without green screen
5
  Stage 1: SAM2 creates reference mask from first 3 frames
6
+ Stage 2: MatAnyone processes entire video → pha.mp4 (alpha matte)
7
+ Stage 3: Direct alpha compositing with background → final.mp4
 
8
  FEATURES:
9
+ - No green screen or chroma key needed
10
+ - Direct alpha channel compositing
11
  - Chunked processing for long videos with memory management
12
  - Dimension mismatch handling
13
  - Memory optimization with light/deep cleanup modes
 
27
  import gc
28
  import time
29
  import traceback
30
+ import subprocess
31
  from pathlib import Path
32
  from typing import Optional, Tuple, Dict, Any, List
33
  from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips
 
41
  )
42
  logger = logging.getLogger(__name__)
43
 
 
 
 
 
 
 
 
 
44
  # PyTorch memory management
45
  try:
46
  import torch
 
64
  CHUNK_OVERLAP_FRAMES = 5 # Frames to overlap between chunks for smooth transitions
65
  MAX_PROCESSING_RESOLUTION = None # Keep full resolution for chunks
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Quality settings
68
  VIDEO_CODEC = 'libx264'
69
  VIDEO_BITRATE = '8000k'
 
84
  ENABLE_MEMORY_TESTING = False # DISABLED: Memory testing causes predictor deletion
85
 
86
  # ==============================================================================
87
+ # CHAPTER 3: MEMORY MANAGEMENT UTILITIES
88
  # ==============================================================================
89
 
90
  class MemoryManager:
91
+ """Utilities for managing GPU and system memory."""
92
 
93
  def __init__(self, config: ProcessingConfig):
94
  self.config = config
 
252
  report.append("="*60)
253
  return "\n".join(report)
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  # ==============================================================================
256
+ # CHAPTER 4: QUALITY MANAGER
257
  # ==============================================================================
258
 
259
  class QualityManager:
 
286
  return cls.PROFILES.get(quality, cls.PROFILES['medium'])
287
 
288
  # ==============================================================================
289
+ # CHAPTER 5: CHUNKED VIDEO PROCESSOR
290
  # ==============================================================================
291
 
292
  class ChunkedVideoProcessor:
 
393
  raise
394
 
395
  # ==============================================================================
396
+ # CHAPTER 6: TWOSTAGEPROCESSOR CLASS DEFINITION
397
  # ==============================================================================
398
 
399
  class TwoStageProcessor:
400
  """
401
+ Efficient two-stage alpha channel processor with video-to-video pipeline.
402
 
403
+ This processor avoids green screen entirely by using alpha channels:
404
  1. SAM2 creates reference mask from first few frames
405
+ 2. MatAnyone processes entire video using reference mask → pha.mp4 (alpha matte)
406
+ 3. Direct alpha compositing with background → final.mp4
407
+
408
+ No chroma key or green screen needed!
 
 
409
  """
410
 
411
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
 
416
  self.config = ProcessingConfig()
417
  self.memory_manager = MemoryManager(self.config)
418
  self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
 
419
 
420
  # Ensure temp directory exists
421
  os.makedirs(self.temp_dir, exist_ok=True)
 
423
 
424
  # Log initial memory state
425
  logger.info(self.memory_manager.get_gpu_memory_info())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
  def process_video(self,
428
  video_path: str,
 
433
  callback: Optional[callable] = None,
434
  **kwargs) -> Tuple[str, str]:
435
  """
436
+ Main processing pipeline - video to video with alpha compositing.
437
 
438
  Returns:
439
  Tuple[str, str]: (final_output_path, status_message)
440
  """
441
  try:
442
+ logger.info(f"🎬 Two-Stage Alpha Pipeline: {video_path}")
443
  logger.info(f"🎯 Background: {background_path}")
444
  logger.info(f"📁 Temp: {self.temp_dir}")
445
  logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
 
505
  else:
506
  raise RuntimeError(f"Chunk {i+1} processing failed: {status}")
507
 
508
+ # Memory cleanup between chunks
509
  logger.info(f"Cleaning up after chunk {i+1}...")
510
  self.memory_manager.cleanup_stage(f"Chunk_{i+1}", force=True)
511
 
 
531
  logger.error(f"Chunked processing failed: {e}")
532
  raise
533
 
 
534
  def _process_single_video(self,
535
  video_path: str,
536
  background_path: str,
 
562
  self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
563
  self.memory_manager.cleanup_stage("MatAnyone", force=True)
564
 
565
+ # Stage 3: Direct alpha compositing (no green screen!)
 
 
 
 
 
 
 
 
 
 
566
  if callback:
567
+ callback("Alpha compositing with background...", 70)
568
+ logger.info("STAGE 3: Direct alpha compositing with background...")
569
+ final_path = self._stage3_alpha_composite(video_path, alpha_video_path, background_path, output_path, quality)
570
 
571
  # Final memory cleanup
572
  self.memory_manager.cleanup_stage("Final")
 
605
  logger.warning(f"Failed to cleanup: {e}")
606
 
607
  # ==============================================================================
608
+ # CHAPTER 7: STAGE 1 - REFERENCE MASK CREATION (SAM2)
609
  # ==============================================================================
610
 
 
611
  def _stage1_create_reference_mask(self, video_path: str) -> str:
612
  """
613
  Stage 1: Create robust reference mask from first few frames using SAM2.
 
730
  return result
731
 
732
  # ==============================================================================
733
+ # CHAPTER 8: STAGE 2 - MATANYONE PROCESSING
734
  # ==============================================================================
735
 
 
736
  def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
737
  """
738
  Stage 2: Process entire video through MatAnyone using reference mask.
 
772
  r_dilate=15,
773
  suffix='pha',
774
  save_image=False,
775
+ max_size=max_size # Use actual number instead of None
776
  )
777
 
778
  except Exception as e:
 
791
 
792
  # Verify MatAnyone output
793
  if not alpha_output_path or not os.path.exists(alpha_output_path):
 
794
  files_created = os.listdir(matanyone_dir) if os.path.exists(matanyone_dir) else []
795
  raise RuntimeError(f"MatAnyone did not create pha.mp4. Files created: {files_created}")
796
 
 
832
  raise
833
 
834
  # ==============================================================================
835
+ # CHAPTER 9: STAGE 3 - DIRECT ALPHA COMPOSITING (NO GREEN SCREEN!)
836
  # ==============================================================================
837
 
838
+ def _stage3_alpha_composite(self, original_video_path: str, alpha_video_path: str,
839
+ background_path: str, output_path: str, quality: str) -> str:
840
  """
841
+ Stage 3: Direct alpha compositing without any green screen.
 
 
 
 
842
 
843
+ Uses the alpha matte from MatAnyone to composite the person
844
+ directly onto the new background.
845
  """
846
  try:
847
+ # Get quality profile
848
+ profile = QualityManager.get_profile(quality)
849
 
850
+ # Open videos
851
  original_cap = cv2.VideoCapture(original_video_path)
852
  alpha_cap = cv2.VideoCapture(alpha_video_path)
853
 
 
866
  alpha_width = int(alpha_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
867
  alpha_height = int(alpha_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
868
 
869
+ logger.info(f"Original video: {width}x{height} @ {fps} FPS, {total_frames} frames")
870
  logger.info(f"Alpha video dimensions: {alpha_width}x{alpha_height}")
871
 
872
+ # Load and prepare background
873
+ if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
874
+ # Video background - process frame by frame
875
+ bg_cap = cv2.VideoCapture(background_path)
876
+ bg_is_video = True
877
+ else:
878
+ # Image background
879
+ bg_image = cv2.imread(background_path)
880
+ if bg_image is None:
881
+ raise RuntimeError(f"Cannot load background image: {background_path}")
882
+ # Resize to match video
883
+ bg_image = cv2.resize(bg_image, (width, height))
884
+ bg_is_video = False
885
+
886
+ # Setup video writer
887
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
888
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
889
 
890
  frame_count = 0
891
  while True:
 
896
  if not ret_orig or not ret_alpha:
897
  break
898
 
899
+ # Get background frame
900
+ if bg_is_video:
901
+ ret_bg, bg_frame = bg_cap.read()
902
+ if not ret_bg:
903
+ # Loop background if it's shorter
904
+ bg_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
905
+ ret_bg, bg_frame = bg_cap.read()
906
+ bg_frame = cv2.resize(bg_frame, (width, height))
907
+ else:
908
+ bg_frame = bg_image.copy()
909
+
910
  # Convert alpha frame to grayscale mask
911
  if len(alpha_frame.shape) == 3:
912
  alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
913
  else:
914
  alpha_mask = alpha_frame
915
 
916
+ # Handle dimension mismatch - resize alpha to match original if needed
917
  if alpha_mask.shape[:2] != orig_frame.shape[:2]:
918
  if frame_count == 0: # Log only once
919
  logger.info(f"Resizing alpha from {alpha_mask.shape[:2]} to {orig_frame.shape[:2]}")
 
922
  # Normalize alpha to 0-1 range
923
  alpha_normalized = alpha_mask.astype(np.float32) / 255.0
924
 
925
+ # Create 3-channel alpha for compositing
 
 
 
 
926
  alpha_3d = np.stack([alpha_normalized] * 3, axis=2)
927
 
928
+ # Composite: background where alpha=0, person where alpha=1
929
  orig_frame_float = orig_frame.astype(np.float32)
930
+ bg_frame_float = bg_frame.astype(np.float32)
931
 
932
+ # Direct alpha compositing
933
+ composite = alpha_3d * orig_frame_float + (1 - alpha_3d) * bg_frame_float
934
  composite = np.clip(composite, 0, 255).astype(np.uint8)
935
 
936
  # Write frame
937
  out.write(composite)
938
  frame_count += 1
939
 
940
+ # Progress logging
941
  if frame_count % self.config.MEMORY_CHECK_INTERVAL == 0:
942
+ logger.info(f"Compositing progress: {frame_count}/{total_frames}")
943
  if self.memory_manager.should_clear_memory():
944
+ logger.info("Memory high during compositing, clearing...")
945
  self.memory_manager.clear_gpu_cache()
946
 
947
  # Save debug frame occasionally
948
  if self.config.SAVE_DEBUG_FILES and frame_count % self.config.DEBUG_FRAME_INTERVAL == 0:
949
+ debug_path = os.path.join(self.temp_dir, f"debug_composite_{frame_count:04d}.png")
950
  cv2.imwrite(debug_path, composite)
951
 
952
  # Cleanup
953
  original_cap.release()
954
  alpha_cap.release()
955
  out.release()
956
+ if bg_is_video:
957
+ bg_cap.release()
958
 
959
  if frame_count == 0:
960
+ raise RuntimeError("No frames processed for output video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
961
 
962
+ # Verify output
963
  if not os.path.exists(output_path):
964
+ raise RuntimeError("Output file was not created")
965
+
966
+ file_size = os.path.getsize(output_path) / (1024 * 1024) # MB
967
+ logger.info(f"✅ Alpha compositing completed: {output_path} ({file_size:.1f} MB)")
968
+ logger.info(f" Processed {frame_count} frames")
969
 
970
  return output_path
971
 
972
  except Exception as e:
973
+ logger.error(f"Stage 3 alpha compositing failed: {e}")
974
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
975
 
976
  # ==============================================================================
977
+ # CHAPTER 10: DEBUG AND UTILITY FUNCTIONS
978
  # ==============================================================================
979
 
980
  def _debug_video_info(self, video_path: str, label: str = "Video"):
 
1031
  return stats
1032
 
1033
  # ==============================================================================
1034
+ # CHAPTER 11: EXPORT INTERFACE AND COMPATIBILITY
1035
  # ==============================================================================
1036
 
1037
  def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
 
1045
  # Main execution example
1046
  if __name__ == "__main__":
1047
  # Example usage - replace with your actual handlers
1048
+ logger.info("TwoStageProcessor (Alpha Channel Version) loaded successfully")
1049
  logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
1050
 
1051
  # Print configuration
1052
  config = ProcessingConfig()
1053
+ logger.info(f"Pipeline: SAM2 MatAnyone → Direct Alpha Compositing")
1054
+ logger.info(f"No green screen or chroma key needed!")
1055
+ logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames")
1056
  logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
1057
+ logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")