MogensR commited on
Commit
16a0e0c
Β·
verified Β·
1 Parent(s): df76738

Update processing/two_stage/two_stage_processor.py

Browse files
processing/two_stage/two_stage_processor.py CHANGED
@@ -1,13 +1,17 @@
1
  #!/usr/bin/env python3
2
  """
3
- EFFICIENT Two-Stage Alpha Channel Processing System βœ… 2025-09-09
4
  VIDEO-TO-VIDEO PIPELINE: Direct alpha compositing without green screen
5
  Stage 1: SAM2 creates reference mask from first 3 frames
6
  Stage 2: MatAnyone processes entire video β†’ pha.mp4 (alpha matte)
7
  Stage 3: Direct alpha compositing with background β†’ final.mp4
 
8
  FEATURES:
9
  - No green screen or chroma key needed
10
  - Direct alpha channel compositing
 
 
 
11
  - Chunked processing for long videos with memory management
12
  - Dimension mismatch handling
13
  - Memory optimization with light/deep cleanup modes
@@ -30,7 +34,7 @@
30
  import subprocess
31
  from pathlib import Path
32
  from typing import Optional, Tuple, Dict, Any, List
33
- from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips
34
  import moviepy.video.fx.all as vfx
35
 
36
  # Setup logging first
@@ -54,7 +58,7 @@
54
  # ==============================================================================
55
 
56
  class ProcessingConfig:
57
- """Configuration settings for two-stage processing."""
58
 
59
  # Reference mask creation
60
  REFERENCE_FRAMES = 3 # Number of frames to use for reference mask
@@ -68,9 +72,18 @@ class ProcessingConfig:
68
  VIDEO_CODEC = 'libx264'
69
  VIDEO_BITRATE = '8000k'
70
  AUDIO_CODEC = 'aac'
 
 
 
 
 
 
 
 
71
 
72
  # Processing limits
73
  MAX_FRAMES_FOR_REFERENCE = 10 # Safety limit
 
74
 
75
  # Memory management
76
  CLEAR_CACHE_AFTER_STAGE = True # Clear GPU cache between stages
@@ -82,6 +95,10 @@ class ProcessingConfig:
82
  SAVE_DEBUG_FILES = True
83
  DEBUG_FRAME_INTERVAL = 30 # Save every Nth frame for debugging
84
  ENABLE_MEMORY_TESTING = False # DISABLED: Memory testing causes predictor deletion
 
 
 
 
85
 
86
  # ==============================================================================
87
  # CHAPTER 3: MEMORY MANAGEMENT UTILITIES
@@ -286,11 +303,96 @@ def get_profile(cls, quality: str = 'medium') -> Dict[str, Any]:
286
  return cls.PROFILES.get(quality, cls.PROFILES['medium'])
287
 
288
  # ==============================================================================
289
- # CHAPTER 5: CHUNKED VIDEO PROCESSOR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  # ==============================================================================
291
 
292
  class ChunkedVideoProcessor:
293
- """Handles splitting and reassembling videos for chunked processing."""
294
 
295
  def __init__(self, temp_dir: str, config: ProcessingConfig):
296
  self.temp_dir = temp_dir
@@ -311,7 +413,7 @@ def should_chunk_video(self, video_path: str) -> bool:
311
  return False
312
 
313
  def split_video_into_chunks(self, video_path: str) -> List[str]:
314
- """Split video into chunks for processing."""
315
  try:
316
  with VideoFileClip(video_path) as clip:
317
  duration = clip.duration
@@ -325,24 +427,40 @@ def split_video_into_chunks(self, video_path: str) -> List[str]:
325
  # Calculate chunk end time
326
  end_time = min(current_time + chunk_duration, duration)
327
 
328
- # Create chunk
 
 
 
 
 
 
 
329
  chunk_clip = clip.subclip(current_time, end_time)
330
  chunk_path = os.path.join(self.chunks_dir, f"chunk_{chunk_index:03d}.mp4")
331
 
 
332
  chunk_clip.write_videofile(
333
  chunk_path,
334
  codec=self.config.VIDEO_CODEC,
335
- audio_codec=self.config.AUDIO_CODEC,
 
 
 
336
  verbose=False,
337
  logger=None
338
  )
339
  chunk_clip.close()
340
 
341
  chunk_paths.append(chunk_path)
342
- logger.info(f"Created chunk {chunk_index}: {current_time:.1f}s - {end_time:.1f}s")
343
 
344
  # Move to next chunk with slight overlap for smooth transitions
345
- current_time = end_time - (self.config.CHUNK_OVERLAP_FRAMES / clip.fps)
 
 
 
 
 
346
  chunk_index += 1
347
 
348
  logger.info(f"Split video into {len(chunk_paths)} chunks")
@@ -352,13 +470,31 @@ def split_video_into_chunks(self, video_path: str) -> List[str]:
352
  logger.error(f"Video chunking failed: {e}")
353
  raise
354
 
355
- def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str) -> str:
356
- """Reassemble processed chunks into final video."""
 
357
  try:
358
  if len(processed_chunk_paths) == 1:
359
- # Single chunk, just copy
360
- shutil.copy2(processed_chunk_paths[0], output_path)
361
- logger.info("Single chunk, copied directly to output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  return output_path
363
 
364
  # Load all chunks
@@ -371,11 +507,23 @@ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str)
371
  logger.info(f"Concatenating {len(chunk_clips)} chunks...")
372
  final_clip = concatenate_videoclips(chunk_clips, method="compose")
373
 
374
- # Write final video
 
 
 
 
 
 
 
 
 
375
  final_clip.write_videofile(
376
  output_path,
377
  codec=self.config.VIDEO_CODEC,
378
- audio_codec=self.config.AUDIO_CODEC,
 
 
 
379
  verbose=False,
380
  logger=None
381
  )
@@ -393,19 +541,24 @@ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str)
393
  raise
394
 
395
  # ==============================================================================
396
- # CHAPTER 6: TWOSTAGEPROCESSOR CLASS DEFINITION
397
  # ==============================================================================
398
 
399
  class TwoStageProcessor:
400
  """
401
- Efficient two-stage alpha channel processor with video-to-video pipeline.
402
 
403
  This processor avoids green screen entirely by using alpha channels:
404
  1. SAM2 creates reference mask from first few frames
405
  2. MatAnyone processes entire video using reference mask β†’ pha.mp4 (alpha matte)
406
  3. Direct alpha compositing with background β†’ final.mp4
407
 
408
- No chroma key or green screen needed!
 
 
 
 
 
409
  """
410
 
411
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
@@ -416,6 +569,7 @@ def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = No
416
  self.config = ProcessingConfig()
417
  self.memory_manager = MemoryManager(self.config)
418
  self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
 
419
 
420
  # Ensure temp directory exists
421
  os.makedirs(self.temp_dir, exist_ok=True)
@@ -433,26 +587,37 @@ def process_video(self,
433
  callback: Optional[callable] = None,
434
  **kwargs) -> Tuple[str, str]:
435
  """
436
- Main processing pipeline - video to video with alpha compositing.
437
 
438
  Returns:
439
  Tuple[str, str]: (final_output_path, status_message)
440
  """
441
  try:
442
- logger.info(f"🎬 Two-Stage Alpha Pipeline: {video_path}")
443
  logger.info(f"🎯 Background: {background_path}")
444
  logger.info(f"πŸ“ Temp: {self.temp_dir}")
445
  logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
446
 
 
 
 
 
 
447
  # Determine if chunking is needed
448
  needs_chunking = self.chunked_processor.should_chunk_video(video_path)
449
 
450
  if needs_chunking:
451
  logger.info("Video exceeds chunk duration, using chunked processing...")
452
- result = self._process_chunked_video(video_path, background_path, output_path, quality, callback)
 
 
 
453
  else:
454
  logger.info("Video within chunk duration, using single-pass processing...")
455
- result = self._process_single_video(video_path, background_path, output_path, quality, trim_seconds, callback)
 
 
 
456
 
457
  # Final memory report
458
  logger.info(self.memory_manager.get_memory_report())
@@ -465,11 +630,36 @@ def process_video(self,
465
  logger.error(f"Traceback: {traceback.format_exc()}")
466
  return None, error_msg
467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  def _process_chunked_video(self,
469
  video_path: str,
470
  background_path: str,
471
  output_path: str,
472
  quality: str,
 
473
  callback: Optional[callable] = None) -> Tuple[str, str]:
474
  """Process long video using chunked approach."""
475
  try:
@@ -491,12 +681,12 @@ def _process_chunked_video(self,
491
  logger.info(f"Processing chunk {i+1}/{total_chunks}: {chunk_path}")
492
  logger.info(f"Pre-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
493
 
494
- # Process this chunk at full resolution
495
  chunk_output = os.path.join(self.temp_dir, f"processed_chunk_{i:03d}.mp4")
496
 
497
  result_path, status = self._process_single_video(
498
  chunk_path, background_path, chunk_output, quality,
499
- trim_seconds=None, callback=None # No sub-callbacks for chunks
500
  )
501
 
502
  if result_path and os.path.exists(result_path):
@@ -516,16 +706,18 @@ def _process_chunked_video(self,
516
  logger.info(f"Post-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
517
 
518
  if callback:
519
- callback("Reassembling chunks...", 85)
520
 
521
- # Reassemble chunks
522
- final_path = self.chunked_processor.reassemble_chunks(processed_chunks, output_path)
 
 
523
 
524
  if callback:
525
  callback("Chunked processing completed!", 100)
526
 
527
  logger.info(f"βœ… Chunked processing completed: {final_path}")
528
- return final_path, f"Success - Processed {total_chunks} chunks"
529
 
530
  except Exception as e:
531
  logger.error(f"Chunked processing failed: {e}")
@@ -537,6 +729,7 @@ def _process_single_video(self,
537
  output_path: str,
538
  quality: str,
539
  trim_seconds: Optional[int] = None,
 
540
  callback: Optional[callable] = None) -> Tuple[str, str]:
541
  """Process a single video (or chunk) through the pipeline."""
542
  try:
@@ -562,11 +755,14 @@ def _process_single_video(self,
562
  self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
563
  self.memory_manager.cleanup_stage("MatAnyone", force=True)
564
 
565
- # Stage 3: Direct alpha compositing (no green screen!)
566
  if callback:
567
  callback("Alpha compositing with background...", 70)
568
- logger.info("STAGE 3: Direct alpha compositing with background...")
569
- final_path = self._stage3_alpha_composite(video_path, alpha_video_path, background_path, output_path, quality)
 
 
 
570
 
571
  # Final memory cleanup
572
  self.memory_manager.cleanup_stage("Final")
@@ -605,7 +801,7 @@ def cleanup(self):
605
  logger.warning(f"Failed to cleanup: {e}")
606
 
607
  # ==============================================================================
608
- # CHAPTER 7: STAGE 1 - REFERENCE MASK CREATION (SAM2)
609
  # ==============================================================================
610
 
611
  def _stage1_create_reference_mask(self, video_path: str) -> str:
@@ -730,7 +926,7 @@ def _combine_reference_masks(self, masks: list) -> np.ndarray:
730
  return result
731
 
732
  # ==============================================================================
733
- # CHAPTER 8: STAGE 2 - MATANYONE PROCESSING
734
  # ==============================================================================
735
 
736
  def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
@@ -815,14 +1011,15 @@ def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str
815
  raise
816
 
817
  def _trim_video(self, input_path: str, output_path: str, seconds: int):
818
- """Trim video to specified duration."""
819
  try:
820
  with VideoFileClip(input_path) as clip:
821
  trimmed = clip.subclip(0, min(seconds, clip.duration))
822
  trimmed.write_videofile(
823
  output_path,
824
  codec=self.config.VIDEO_CODEC,
825
- audio_codec=self.config.AUDIO_CODEC,
 
826
  verbose=False,
827
  logger=None
828
  )
@@ -832,149 +1029,163 @@ def _trim_video(self, input_path: str, output_path: str, seconds: int):
832
  raise
833
 
834
  # ==============================================================================
835
- # CHAPTER 9: STAGE 3 - DIRECT ALPHA COMPOSITING (NO GREEN SCREEN!)
836
  # ==============================================================================
837
 
838
- def _stage3_alpha_composite(self, original_video_path: str, alpha_video_path: str,
839
- background_path: str, output_path: str, quality: str) -> str:
 
 
 
 
840
  """
841
- Stage 3: Direct alpha compositing without any green screen.
842
 
843
- Uses the alpha matte from MatAnyone to composite the person
844
- directly onto the new background.
845
  """
846
  try:
847
- # Get quality profile
848
- profile = QualityManager.get_profile(quality)
849
-
850
- # Open videos
851
- original_cap = cv2.VideoCapture(original_video_path)
852
- alpha_cap = cv2.VideoCapture(alpha_video_path)
853
 
854
- if not original_cap.isOpened():
855
- raise RuntimeError(f"Cannot open original video: {original_video_path}")
856
- if not alpha_cap.isOpened():
857
- raise RuntimeError(f"Cannot open alpha video: {alpha_video_path}")
858
-
859
- # Get video properties
860
- fps = original_cap.get(cv2.CAP_PROP_FPS)
861
- width = int(original_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
862
- height = int(original_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
863
- total_frames = int(original_cap.get(cv2.CAP_PROP_FRAME_COUNT))
864
-
865
- # Get alpha video dimensions
866
- alpha_width = int(alpha_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
867
- alpha_height = int(alpha_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
868
-
869
- logger.info(f"Original video: {width}x{height} @ {fps} FPS, {total_frames} frames")
870
- logger.info(f"Alpha video dimensions: {alpha_width}x{alpha_height}")
871
-
872
- # Load and prepare background
873
  if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
874
- # Video background - process frame by frame
875
- bg_cap = cv2.VideoCapture(background_path)
876
- bg_is_video = True
 
 
 
 
 
877
  else:
878
- # Image background
879
- bg_image = cv2.imread(background_path)
880
- if bg_image is None:
881
- raise RuntimeError(f"Cannot load background image: {background_path}")
882
- # Resize to match video
883
- bg_image = cv2.resize(bg_image, (width, height))
884
- bg_is_video = False
885
-
886
- # Setup video writer
887
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
888
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
 
890
- frame_count = 0
891
- while True:
892
- # Read frames
893
- ret_orig, orig_frame = original_cap.read()
894
- ret_alpha, alpha_frame = alpha_cap.read()
895
-
896
- if not ret_orig or not ret_alpha:
897
- break
898
-
899
- # Get background frame
900
- if bg_is_video:
901
- ret_bg, bg_frame = bg_cap.read()
902
- if not ret_bg:
903
- # Loop background if it's shorter
904
- bg_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
905
- ret_bg, bg_frame = bg_cap.read()
906
- bg_frame = cv2.resize(bg_frame, (width, height))
907
- else:
908
- bg_frame = bg_image.copy()
909
-
910
- # Convert alpha frame to grayscale mask
911
- if len(alpha_frame.shape) == 3:
912
- alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
913
- else:
914
- alpha_mask = alpha_frame
915
-
916
- # Handle dimension mismatch - resize alpha to match original if needed
917
- if alpha_mask.shape[:2] != orig_frame.shape[:2]:
918
- if frame_count == 0: # Log only once
919
- logger.info(f"Resizing alpha from {alpha_mask.shape[:2]} to {orig_frame.shape[:2]}")
920
- alpha_mask = cv2.resize(alpha_mask, (width, height), interpolation=cv2.INTER_LINEAR)
921
-
922
- # Normalize alpha to 0-1 range
923
- alpha_normalized = alpha_mask.astype(np.float32) / 255.0
924
-
925
- # Create 3-channel alpha for compositing
926
- alpha_3d = np.stack([alpha_normalized] * 3, axis=2)
927
-
928
- # Composite: background where alpha=0, person where alpha=1
929
- orig_frame_float = orig_frame.astype(np.float32)
930
- bg_frame_float = bg_frame.astype(np.float32)
931
-
932
- # Direct alpha compositing
933
- composite = alpha_3d * orig_frame_float + (1 - alpha_3d) * bg_frame_float
934
- composite = np.clip(composite, 0, 255).astype(np.uint8)
935
-
936
- # Write frame
937
- out.write(composite)
938
- frame_count += 1
939
-
940
- # Progress logging
941
- if frame_count % self.config.MEMORY_CHECK_INTERVAL == 0:
942
- logger.info(f"Compositing progress: {frame_count}/{total_frames}")
943
- if self.memory_manager.should_clear_memory():
944
- logger.info("Memory high during compositing, clearing...")
945
- self.memory_manager.clear_gpu_cache()
946
-
947
- # Save debug frame occasionally
948
- if self.config.SAVE_DEBUG_FILES and frame_count % self.config.DEBUG_FRAME_INTERVAL == 0:
949
- debug_path = os.path.join(self.temp_dir, f"debug_composite_{frame_count:04d}.png")
950
- cv2.imwrite(debug_path, composite)
951
 
952
- # Cleanup
953
- original_cap.release()
954
- alpha_cap.release()
955
- out.release()
956
- if bg_is_video:
957
- bg_cap.release()
 
 
 
 
 
 
 
958
 
959
- if frame_count == 0:
960
- raise RuntimeError("No frames processed for output video")
 
 
 
 
961
 
962
  # Verify output
963
  if not os.path.exists(output_path):
964
  raise RuntimeError("Output file was not created")
965
-
 
 
 
 
 
 
966
  file_size = os.path.getsize(output_path) / (1024 * 1024) # MB
967
- logger.info(f"βœ… Alpha compositing completed: {output_path} ({file_size:.1f} MB)")
968
- logger.info(f" Processed {frame_count} frames")
 
 
969
 
970
  return output_path
971
 
972
  except Exception as e:
973
- logger.error(f"Stage 3 alpha compositing failed: {e}")
974
  raise
975
 
976
  # ==============================================================================
977
- # CHAPTER 10: DEBUG AND UTILITY FUNCTIONS
978
  # ==============================================================================
979
 
980
  def _debug_video_info(self, video_path: str, label: str = "Video"):
@@ -1007,7 +1218,13 @@ def get_processing_stats(self) -> Dict[str, Any]:
1007
  'temp_size_mb': 0,
1008
  'debug_files': 0,
1009
  'memory_info': self.memory_manager.get_gpu_memory_info(),
1010
- 'memory_report': self.memory_manager.get_memory_report()
 
 
 
 
 
 
1011
  }
1012
 
1013
  try:
@@ -1031,7 +1248,7 @@ def get_processing_stats(self) -> Dict[str, Any]:
1031
  return stats
1032
 
1033
  # ==============================================================================
1034
- # CHAPTER 11: EXPORT INTERFACE AND COMPATIBILITY
1035
  # ==============================================================================
1036
 
1037
  def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
@@ -1045,13 +1262,22 @@ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
1045
  # Main execution example
1046
  if __name__ == "__main__":
1047
  # Example usage - replace with your actual handlers
1048
- logger.info("TwoStageProcessor (Alpha Channel Version) loaded successfully")
 
 
 
 
 
 
 
1049
  logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
1050
 
1051
  # Print configuration
1052
  config = ProcessingConfig()
1053
- logger.info(f"Pipeline: SAM2 β†’ MatAnyone β†’ Direct Alpha Compositing")
1054
- logger.info(f"No green screen or chroma key needed!")
1055
- logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames")
1056
- logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
1057
- logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ ENHANCED Two-Stage Alpha Channel Processing System βœ… 2025-09-09
4
  VIDEO-TO-VIDEO PIPELINE: Direct alpha compositing without green screen
5
  Stage 1: SAM2 creates reference mask from first 3 frames
6
  Stage 2: MatAnyone processes entire video β†’ pha.mp4 (alpha matte)
7
  Stage 3: Direct alpha compositing with background β†’ final.mp4
8
+
9
  FEATURES:
10
  - No green screen or chroma key needed
11
  - Direct alpha channel compositing
12
+ - Audio preservation throughout pipeline
13
+ - Full video duration processing (no truncation)
14
+ - Alpha matte refinement for cleaner edges
15
  - Chunked processing for long videos with memory management
16
  - Dimension mismatch handling
17
  - Memory optimization with light/deep cleanup modes
 
34
  import subprocess
35
  from pathlib import Path
36
  from typing import Optional, Tuple, Dict, Any, List
37
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips, VideoClip
38
  import moviepy.video.fx.all as vfx
39
 
40
  # Setup logging first
 
58
  # ==============================================================================
59
 
60
  class ProcessingConfig:
61
+ """Configuration settings for two-stage processing with audio support."""
62
 
63
  # Reference mask creation
64
  REFERENCE_FRAMES = 3 # Number of frames to use for reference mask
 
72
  VIDEO_CODEC = 'libx264'
73
  VIDEO_BITRATE = '8000k'
74
  AUDIO_CODEC = 'aac'
75
+ AUDIO_BITRATE = '192k' # Good quality audio
76
+
77
+ # Alpha refinement settings
78
+ ALPHA_SMOOTHING = 3 # Median blur kernel size for temporal smoothing
79
+ ALPHA_EDGE_BLUR = 1 # Gaussian blur radius for edge feathering
80
+ ALPHA_CONTRAST = 1.2 # Power curve for sharper alpha transitions
81
+ ENABLE_DEFRINGING = True # Remove color fringing at edges
82
+ DEFRINGE_RADIUS = 1 # Radius for defringing operation
83
 
84
  # Processing limits
85
  MAX_FRAMES_FOR_REFERENCE = 10 # Safety limit
86
+ MIN_FINAL_CHUNK_DURATION = 10 # Don't create chunks smaller than this
87
 
88
  # Memory management
89
  CLEAR_CACHE_AFTER_STAGE = True # Clear GPU cache between stages
 
95
  SAVE_DEBUG_FILES = True
96
  DEBUG_FRAME_INTERVAL = 30 # Save every Nth frame for debugging
97
  ENABLE_MEMORY_TESTING = False # DISABLED: Memory testing causes predictor deletion
98
+
99
+ # Audio settings
100
+ PRESERVE_ORIGINAL_AUDIO = True # Always try to preserve original audio
101
+ AUDIO_TEMP_FORMAT = 'm4a' # Temporary audio format
102
 
103
  # ==============================================================================
104
  # CHAPTER 3: MEMORY MANAGEMENT UTILITIES
 
303
  return cls.PROFILES.get(quality, cls.PROFILES['medium'])
304
 
305
  # ==============================================================================
306
+ # CHAPTER 5: ALPHA REFINEMENT UTILITIES
307
+ # ==============================================================================
308
+
309
+ class AlphaRefiner:
310
+ """Utilities for refining alpha mattes for better compositing."""
311
+
312
+ @staticmethod
313
+ def refine_alpha_matte(alpha: np.ndarray, config: ProcessingConfig) -> np.ndarray:
314
+ """
315
+ Refine alpha matte for cleaner compositing.
316
+
317
+ Args:
318
+ alpha: Input alpha matte
319
+ config: Processing configuration
320
+
321
+ Returns:
322
+ Refined alpha matte
323
+ """
324
+ # Ensure single channel
325
+ if len(alpha.shape) == 3:
326
+ alpha = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
327
+
328
+ # Normalize to 0-1 range if needed
329
+ if alpha.dtype == np.uint8:
330
+ alpha = alpha.astype(np.float32) / 255.0
331
+
332
+ # Temporal smoothing to reduce flickering
333
+ if config.ALPHA_SMOOTHING > 0:
334
+ kernel_size = config.ALPHA_SMOOTHING
335
+ # Ensure odd kernel size
336
+ if kernel_size % 2 == 0:
337
+ kernel_size += 1
338
+ alpha = cv2.medianBlur((alpha * 255).astype(np.uint8), kernel_size).astype(np.float32) / 255.0
339
+
340
+ # Edge feathering for softer transitions
341
+ if config.ALPHA_EDGE_BLUR > 0:
342
+ kernel_size = config.ALPHA_EDGE_BLUR * 2 + 1
343
+ alpha = cv2.GaussianBlur(alpha, (kernel_size, kernel_size), 0)
344
+
345
+ # Apply contrast adjustment for sharper boundaries
346
+ if config.ALPHA_CONTRAST != 1.0:
347
+ alpha = np.power(alpha, config.ALPHA_CONTRAST)
348
+
349
+ # Ensure valid range
350
+ alpha = np.clip(alpha, 0, 1)
351
+
352
+ return alpha
353
+
354
+ @staticmethod
355
+ def defringe_edges(composite: np.ndarray, alpha: np.ndarray, radius: int = 1) -> np.ndarray:
356
+ """
357
+ Remove color fringing at alpha edges.
358
+
359
+ Args:
360
+ composite: Composited image
361
+ alpha: Alpha matte used for compositing
362
+ radius: Radius for defringing operation
363
+
364
+ Returns:
365
+ Defringed composite
366
+ """
367
+ # Ensure alpha is single channel
368
+ if len(alpha.shape) == 3:
369
+ alpha = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
370
+
371
+ # Normalize alpha if needed
372
+ if alpha.dtype == np.uint8:
373
+ alpha = alpha.astype(np.float32) / 255.0
374
+
375
+ # Create edge mask using morphological operations
376
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (radius*2+1, radius*2+1))
377
+ dilated = cv2.dilate(alpha, kernel)
378
+ eroded = cv2.erode(alpha, kernel)
379
+ edge_mask = dilated - eroded
380
+
381
+ # Apply slight blur to edges only
382
+ blurred = cv2.GaussianBlur(composite, (3, 3), 0)
383
+
384
+ # Blend based on edge mask
385
+ edge_mask_3d = np.stack([edge_mask] * 3, axis=2)
386
+ result = composite * (1 - edge_mask_3d) + blurred * edge_mask_3d
387
+
388
+ return result.astype(np.uint8)
389
+
390
+ # ==============================================================================
391
+ # CHAPTER 6: ENHANCED CHUNKED VIDEO PROCESSOR
392
  # ==============================================================================
393
 
394
  class ChunkedVideoProcessor:
395
+ """Handles splitting and reassembling videos for chunked processing with audio support."""
396
 
397
  def __init__(self, temp_dir: str, config: ProcessingConfig):
398
  self.temp_dir = temp_dir
 
413
  return False
414
 
415
  def split_video_into_chunks(self, video_path: str) -> List[str]:
416
+ """Split video into chunks for processing, preserving audio."""
417
  try:
418
  with VideoFileClip(video_path) as clip:
419
  duration = clip.duration
 
427
  # Calculate chunk end time
428
  end_time = min(current_time + chunk_duration, duration)
429
 
430
+ # Check if remaining duration after this chunk is too small
431
+ remaining_after = duration - end_time
432
+ if remaining_after > 0 and remaining_after < self.config.MIN_FINAL_CHUNK_DURATION:
433
+ # Include the remaining portion in this chunk
434
+ end_time = duration
435
+ logger.info(f"Including final {remaining_after:.1f}s in chunk {chunk_index}")
436
+
437
+ # Create chunk with audio
438
  chunk_clip = clip.subclip(current_time, end_time)
439
  chunk_path = os.path.join(self.chunks_dir, f"chunk_{chunk_index:03d}.mp4")
440
 
441
+ # Write chunk with audio preservation
442
  chunk_clip.write_videofile(
443
  chunk_path,
444
  codec=self.config.VIDEO_CODEC,
445
+ audio_codec=self.config.AUDIO_CODEC if chunk_clip.audio else None,
446
+ audio_bitrate=self.config.AUDIO_BITRATE if chunk_clip.audio else None,
447
+ temp_audiofile=os.path.join(self.temp_dir, f"temp_audio_chunk_{chunk_index}.{self.config.AUDIO_TEMP_FORMAT}"),
448
+ remove_temp=True,
449
  verbose=False,
450
  logger=None
451
  )
452
  chunk_clip.close()
453
 
454
  chunk_paths.append(chunk_path)
455
+ logger.info(f"Created chunk {chunk_index}: {current_time:.1f}s - {end_time:.1f}s (duration: {end_time - current_time:.1f}s)")
456
 
457
  # Move to next chunk with slight overlap for smooth transitions
458
+ if end_time < duration: # Not the last chunk
459
+ overlap_seconds = self.config.CHUNK_OVERLAP_FRAMES / clip.fps
460
+ current_time = end_time - overlap_seconds
461
+ else:
462
+ current_time = duration # We're done
463
+
464
  chunk_index += 1
465
 
466
  logger.info(f"Split video into {len(chunk_paths)} chunks")
 
470
  logger.error(f"Video chunking failed: {e}")
471
  raise
472
 
473
+ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str,
474
+ original_audio_path: Optional[str] = None) -> str:
475
+ """Reassemble processed chunks into final video with original audio."""
476
  try:
477
  if len(processed_chunk_paths) == 1:
478
+ # Single chunk, just copy or add audio
479
+ if original_audio_path and os.path.exists(original_audio_path):
480
+ # Add audio to single chunk
481
+ with VideoFileClip(processed_chunk_paths[0]) as video:
482
+ audio = AudioFileClip(original_audio_path)
483
+ final = video.set_audio(audio)
484
+ final.write_videofile(
485
+ output_path,
486
+ codec=self.config.VIDEO_CODEC,
487
+ audio_codec=self.config.AUDIO_CODEC,
488
+ audio_bitrate=self.config.AUDIO_BITRATE,
489
+ temp_audiofile=os.path.join(self.temp_dir, "temp_single_audio.m4a"),
490
+ remove_temp=True,
491
+ verbose=False,
492
+ logger=None
493
+ )
494
+ final.close()
495
+ else:
496
+ shutil.copy2(processed_chunk_paths[0], output_path)
497
+ logger.info("Single chunk processed")
498
  return output_path
499
 
500
  # Load all chunks
 
507
  logger.info(f"Concatenating {len(chunk_clips)} chunks...")
508
  final_clip = concatenate_videoclips(chunk_clips, method="compose")
509
 
510
+ # Add original audio if provided
511
+ if original_audio_path and os.path.exists(original_audio_path):
512
+ logger.info("Adding original audio track...")
513
+ audio_clip = AudioFileClip(original_audio_path)
514
+ # Ensure audio matches video duration
515
+ if audio_clip.duration > final_clip.duration:
516
+ audio_clip = audio_clip.subclip(0, final_clip.duration)
517
+ final_clip = final_clip.set_audio(audio_clip)
518
+
519
+ # Write final video with audio
520
  final_clip.write_videofile(
521
  output_path,
522
  codec=self.config.VIDEO_CODEC,
523
+ audio_codec=self.config.AUDIO_CODEC if final_clip.audio else None,
524
+ audio_bitrate=self.config.AUDIO_BITRATE if final_clip.audio else None,
525
+ temp_audiofile=os.path.join(self.temp_dir, "temp_final_audio.m4a"),
526
+ remove_temp=True,
527
  verbose=False,
528
  logger=None
529
  )
 
541
  raise
542
 
543
  # ==============================================================================
544
+ # CHAPTER 7: TWOSTAGEPROCESSOR CLASS DEFINITION
545
  # ==============================================================================
546
 
547
  class TwoStageProcessor:
548
  """
549
+ Enhanced two-stage alpha channel processor with video-to-video pipeline.
550
 
551
  This processor avoids green screen entirely by using alpha channels:
552
  1. SAM2 creates reference mask from first few frames
553
  2. MatAnyone processes entire video using reference mask β†’ pha.mp4 (alpha matte)
554
  3. Direct alpha compositing with background β†’ final.mp4
555
 
556
+ Features:
557
+ - No chroma key or green screen needed
558
+ - Audio preservation throughout pipeline
559
+ - Full video duration processing
560
+ - Alpha matte refinement for cleaner edges
561
+ - Chunked processing for long videos
562
  """
563
 
564
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
 
569
  self.config = ProcessingConfig()
570
  self.memory_manager = MemoryManager(self.config)
571
  self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
572
+ self.alpha_refiner = AlphaRefiner()
573
 
574
  # Ensure temp directory exists
575
  os.makedirs(self.temp_dir, exist_ok=True)
 
587
  callback: Optional[callable] = None,
588
  **kwargs) -> Tuple[str, str]:
589
  """
590
+ Main processing pipeline - video to video with alpha compositing and audio.
591
 
592
  Returns:
593
  Tuple[str, str]: (final_output_path, status_message)
594
  """
595
  try:
596
+ logger.info(f"🎬 Enhanced Two-Stage Alpha Pipeline: {video_path}")
597
  logger.info(f"🎯 Background: {background_path}")
598
  logger.info(f"πŸ“ Temp: {self.temp_dir}")
599
  logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
600
 
601
+ # Extract and save original audio first
602
+ original_audio_path = None
603
+ if self.config.PRESERVE_ORIGINAL_AUDIO:
604
+ original_audio_path = self._extract_audio(video_path)
605
+
606
  # Determine if chunking is needed
607
  needs_chunking = self.chunked_processor.should_chunk_video(video_path)
608
 
609
  if needs_chunking:
610
  logger.info("Video exceeds chunk duration, using chunked processing...")
611
+ result = self._process_chunked_video(
612
+ video_path, background_path, output_path,
613
+ quality, original_audio_path, callback
614
+ )
615
  else:
616
  logger.info("Video within chunk duration, using single-pass processing...")
617
+ result = self._process_single_video(
618
+ video_path, background_path, output_path,
619
+ quality, trim_seconds, original_audio_path, callback
620
+ )
621
 
622
  # Final memory report
623
  logger.info(self.memory_manager.get_memory_report())
 
630
  logger.error(f"Traceback: {traceback.format_exc()}")
631
  return None, error_msg
632
 
633
+ def _extract_audio(self, video_path: str) -> Optional[str]:
634
+ """Extract audio from original video for later reattachment."""
635
+ try:
636
+ audio_path = os.path.join(self.temp_dir, f"original_audio.{self.config.AUDIO_TEMP_FORMAT}")
637
+
638
+ with VideoFileClip(video_path) as clip:
639
+ if clip.audio is not None:
640
+ clip.audio.write_audiofile(
641
+ audio_path,
642
+ codec=self.config.AUDIO_CODEC,
643
+ bitrate=self.config.AUDIO_BITRATE,
644
+ verbose=False,
645
+ logger=None
646
+ )
647
+ logger.info(f"βœ… Extracted audio: {audio_path}")
648
+ return audio_path
649
+ else:
650
+ logger.warning("Video has no audio track")
651
+ return None
652
+
653
+ except Exception as e:
654
+ logger.warning(f"Could not extract audio: {e}")
655
+ return None
656
+
657
  def _process_chunked_video(self,
658
  video_path: str,
659
  background_path: str,
660
  output_path: str,
661
  quality: str,
662
+ original_audio_path: Optional[str],
663
  callback: Optional[callable] = None) -> Tuple[str, str]:
664
  """Process long video using chunked approach."""
665
  try:
 
681
  logger.info(f"Processing chunk {i+1}/{total_chunks}: {chunk_path}")
682
  logger.info(f"Pre-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
683
 
684
+ # Process this chunk at full resolution without audio (add at the end)
685
  chunk_output = os.path.join(self.temp_dir, f"processed_chunk_{i:03d}.mp4")
686
 
687
  result_path, status = self._process_single_video(
688
  chunk_path, background_path, chunk_output, quality,
689
+ trim_seconds=None, original_audio_path=None, callback=None
690
  )
691
 
692
  if result_path and os.path.exists(result_path):
 
706
  logger.info(f"Post-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
707
 
708
  if callback:
709
+ callback("Reassembling chunks with audio...", 85)
710
 
711
+ # Reassemble chunks with original audio
712
+ final_path = self.chunked_processor.reassemble_chunks(
713
+ processed_chunks, output_path, original_audio_path
714
+ )
715
 
716
  if callback:
717
  callback("Chunked processing completed!", 100)
718
 
719
  logger.info(f"βœ… Chunked processing completed: {final_path}")
720
+ return final_path, f"Success - Processed {total_chunks} chunks with audio"
721
 
722
  except Exception as e:
723
  logger.error(f"Chunked processing failed: {e}")
 
729
  output_path: str,
730
  quality: str,
731
  trim_seconds: Optional[int] = None,
732
+ original_audio_path: Optional[str] = None,
733
  callback: Optional[callable] = None) -> Tuple[str, str]:
734
  """Process a single video (or chunk) through the pipeline."""
735
  try:
 
755
  self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
756
  self.memory_manager.cleanup_stage("MatAnyone", force=True)
757
 
758
+ # Stage 3: Enhanced alpha compositing with audio
759
  if callback:
760
  callback("Alpha compositing with background...", 70)
761
+ logger.info("STAGE 3: Enhanced alpha compositing with background...")
762
+ final_path = self._stage3_enhanced_alpha_composite(
763
+ video_path, alpha_video_path, background_path,
764
+ output_path, quality, original_audio_path
765
+ )
766
 
767
  # Final memory cleanup
768
  self.memory_manager.cleanup_stage("Final")
 
801
  logger.warning(f"Failed to cleanup: {e}")
802
 
803
  # ==============================================================================
804
+ # CHAPTER 8: STAGE 1 - REFERENCE MASK CREATION (SAM2)
805
  # ==============================================================================
806
 
807
  def _stage1_create_reference_mask(self, video_path: str) -> str:
 
926
  return result
927
 
928
  # ==============================================================================
929
+ # CHAPTER 9: STAGE 2 - MATANYONE PROCESSING
930
  # ==============================================================================
931
 
932
  def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
 
1011
  raise
1012
 
1013
  def _trim_video(self, input_path: str, output_path: str, seconds: int):
1014
+ """Trim video to specified duration with audio preservation."""
1015
  try:
1016
  with VideoFileClip(input_path) as clip:
1017
  trimmed = clip.subclip(0, min(seconds, clip.duration))
1018
  trimmed.write_videofile(
1019
  output_path,
1020
  codec=self.config.VIDEO_CODEC,
1021
+ audio_codec=self.config.AUDIO_CODEC if clip.audio else None,
1022
+ audio_bitrate=self.config.AUDIO_BITRATE if clip.audio else None,
1023
  verbose=False,
1024
  logger=None
1025
  )
 
1029
  raise
1030
 
1031
  # ==============================================================================
1032
+ # CHAPTER 10: STAGE 3 - ENHANCED ALPHA COMPOSITING WITH AUDIO
1033
  # ==============================================================================
1034
 
1035
+ def _stage3_enhanced_alpha_composite(self, original_video_path: str,
1036
+ alpha_video_path: str,
1037
+ background_path: str,
1038
+ output_path: str,
1039
+ quality: str,
1040
+ audio_path: Optional[str]) -> str:
1041
  """
1042
+ Stage 3: Enhanced alpha compositing with refinement and audio support.
1043
 
1044
+ Uses MoviePy for better audio handling and applies alpha refinement
1045
+ for cleaner edges and better compositing quality.
1046
  """
1047
  try:
1048
+ # Load videos using MoviePy for better audio support
1049
+ logger.info("Loading video clips...")
1050
+ original_clip = VideoFileClip(original_video_path)
1051
+ alpha_clip = VideoFileClip(alpha_video_path)
 
 
1052
 
1053
+ # Load background
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
1055
+ logger.info("Loading video background...")
1056
+ background_clip = VideoFileClip(background_path)
1057
+ background_clip = background_clip.resize(original_clip.size)
1058
+
1059
+ # Loop background if it's shorter than the foreground
1060
+ if background_clip.duration < original_clip.duration:
1061
+ logger.info("Looping background video to match duration...")
1062
+ background_clip = background_clip.loop(duration=original_clip.duration)
1063
  else:
1064
+ logger.info("Loading image background...")
1065
+ background_clip = ImageClip(background_path).set_duration(original_clip.duration)
1066
+ background_clip = background_clip.resize(original_clip.size)
1067
+
1068
+ # Get video properties for logging
1069
+ fps = original_clip.fps
1070
+ width, height = original_clip.size
1071
+ duration = original_clip.duration
1072
+ total_frames = int(fps * duration)
1073
+
1074
+ logger.info(f"Video properties: {width}x{height} @ {fps:.1f} FPS")
1075
+ logger.info(f"Duration: {duration:.1f}s ({total_frames} frames)")
1076
+
1077
+ # Create enhanced composite function with alpha refinement
1078
+ def make_frame_enhanced(t):
1079
+ """Create composite frame at time t with alpha refinement."""
1080
+ try:
1081
+ # Get frames at time t
1082
+ orig_frame = original_clip.get_frame(t)
1083
+ alpha_frame = alpha_clip.get_frame(t)
1084
+ bg_frame = background_clip.get_frame(t)
1085
+
1086
+ # Convert alpha to grayscale if needed
1087
+ if len(alpha_frame.shape) == 3:
1088
+ alpha_mask = cv2.cvtColor(alpha_frame.astype(np.uint8), cv2.COLOR_RGB2GRAY)
1089
+ else:
1090
+ alpha_mask = alpha_frame
1091
+
1092
+ # Apply alpha refinement for cleaner edges
1093
+ alpha_refined = self.alpha_refiner.refine_alpha_matte(alpha_mask, self.config)
1094
+
1095
+ # Handle dimension mismatch
1096
+ if alpha_refined.shape[:2] != orig_frame.shape[:2]:
1097
+ alpha_refined = cv2.resize(alpha_refined,
1098
+ (orig_frame.shape[1], orig_frame.shape[0]),
1099
+ interpolation=cv2.INTER_LINEAR)
1100
+
1101
+ # Create 3-channel alpha for compositing
1102
+ alpha_3d = np.stack([alpha_refined] * 3, axis=2)
1103
+
1104
+ # Direct alpha compositing
1105
+ composite = alpha_3d * orig_frame + (1 - alpha_3d) * bg_frame
1106
+
1107
+ # Optional: remove color fringing at edges
1108
+ if self.config.ENABLE_DEFRINGING:
1109
+ composite = self.alpha_refiner.defringe_edges(
1110
+ composite.astype(np.uint8),
1111
+ alpha_refined,
1112
+ radius=self.config.DEFRINGE_RADIUS
1113
+ )
1114
+
1115
+ return np.clip(composite, 0, 255).astype(np.uint8)
1116
+
1117
+ except Exception as e:
1118
+ logger.error(f"Frame processing error at t={t}: {e}")
1119
+ # Return original frame as fallback
1120
+ return original_clip.get_frame(t)
1121
+
1122
+ # Create composite video with enhanced processing
1123
+ logger.info("Creating composite video with alpha refinement...")
1124
+ composite_clip = VideoClip(make_frame_enhanced, duration=original_clip.duration)
1125
+ composite_clip = composite_clip.set_fps(original_clip.fps)
1126
+
1127
+ # Add audio (prioritize provided audio path, then original audio)
1128
+ if audio_path and os.path.exists(audio_path):
1129
+ logger.info(f"Adding audio from: {audio_path}")
1130
+ audio_clip = AudioFileClip(audio_path)
1131
+ # Ensure audio matches video duration
1132
+ if audio_clip.duration > composite_clip.duration:
1133
+ audio_clip = audio_clip.subclip(0, composite_clip.duration)
1134
+ composite_clip = composite_clip.set_audio(audio_clip)
1135
+ elif original_clip.audio is not None:
1136
+ logger.info("Using original video's audio...")
1137
+ composite_clip = composite_clip.set_audio(original_clip.audio)
1138
+ else:
1139
+ logger.warning("No audio available for final output")
1140
 
1141
+ # Get quality profile
1142
+ profile = QualityManager.get_profile(quality)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1143
 
1144
+ # Write final video with audio
1145
+ logger.info(f"Writing final video with quality profile: {quality}")
1146
+ composite_clip.write_videofile(
1147
+ output_path,
1148
+ codec=self.config.VIDEO_CODEC,
1149
+ bitrate=profile['bitrate'],
1150
+ audio_codec=self.config.AUDIO_CODEC if composite_clip.audio else None,
1151
+ audio_bitrate=self.config.AUDIO_BITRATE if composite_clip.audio else None,
1152
+ temp_audiofile=os.path.join(self.temp_dir, f"temp_composite_audio.{self.config.AUDIO_TEMP_FORMAT}"),
1153
+ remove_temp=True,
1154
+ verbose=False,
1155
+ logger=None
1156
+ )
1157
 
1158
+ # Cleanup clips
1159
+ logger.info("Cleaning up video clips...")
1160
+ original_clip.close()
1161
+ alpha_clip.close()
1162
+ background_clip.close()
1163
+ composite_clip.close()
1164
 
1165
  # Verify output
1166
  if not os.path.exists(output_path):
1167
  raise RuntimeError("Output file was not created")
1168
+
1169
+ # Verify the output has proper duration
1170
+ with VideoFileClip(output_path) as verify_clip:
1171
+ output_duration = verify_clip.duration
1172
+ if abs(output_duration - duration) > 1.0: # Allow 1 second tolerance
1173
+ logger.warning(f"Duration mismatch: expected {duration:.1f}s, got {output_duration:.1f}s")
1174
+
1175
  file_size = os.path.getsize(output_path) / (1024 * 1024) # MB
1176
+ logger.info(f"βœ… Enhanced compositing completed: {output_path}")
1177
+ logger.info(f" File size: {file_size:.1f} MB")
1178
+ logger.info(f" Duration: {output_duration:.1f}s")
1179
+ logger.info(f" Audio: {'Yes' if composite_clip.audio else 'No'}")
1180
 
1181
  return output_path
1182
 
1183
  except Exception as e:
1184
+ logger.error(f"Stage 3 enhanced compositing failed: {e}")
1185
  raise
1186
 
1187
  # ==============================================================================
1188
+ # CHAPTER 11: DEBUG AND UTILITY FUNCTIONS
1189
  # ==============================================================================
1190
 
1191
  def _debug_video_info(self, video_path: str, label: str = "Video"):
 
1218
  'temp_size_mb': 0,
1219
  'debug_files': 0,
1220
  'memory_info': self.memory_manager.get_gpu_memory_info(),
1221
+ 'memory_report': self.memory_manager.get_memory_report(),
1222
+ 'config': {
1223
+ 'audio_enabled': self.config.PRESERVE_ORIGINAL_AUDIO,
1224
+ 'alpha_refinement': self.config.ALPHA_SMOOTHING > 0,
1225
+ 'defringing': self.config.ENABLE_DEFRINGING,
1226
+ 'chunk_duration': self.config.MAX_CHUNK_DURATION
1227
+ }
1228
  }
1229
 
1230
  try:
 
1248
  return stats
1249
 
1250
  # ==============================================================================
1251
+ # CHAPTER 12: EXPORT INTERFACE AND COMPATIBILITY
1252
  # ==============================================================================
1253
 
1254
  def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
 
1262
  # Main execution example
1263
  if __name__ == "__main__":
1264
  # Example usage - replace with your actual handlers
1265
+ logger.info("Enhanced TwoStageProcessor (Alpha Channel Version) loaded successfully")
1266
+ logger.info("Features:")
1267
+ logger.info(" βœ… No green screen or chroma key needed")
1268
+ logger.info(" βœ… Audio preservation throughout pipeline")
1269
+ logger.info(" βœ… Full video duration processing")
1270
+ logger.info(" βœ… Alpha matte refinement for cleaner edges")
1271
+ logger.info(" βœ… Chunked processing for long videos")
1272
+ logger.info("")
1273
  logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
1274
 
1275
  # Print configuration
1276
  config = ProcessingConfig()
1277
+ logger.info(f"Pipeline: SAM2 β†’ MatAnyone β†’ Enhanced Alpha Compositing")
1278
+ logger.info(f"Configuration:")
1279
+ logger.info(f" Reference frames: {config.REFERENCE_FRAMES}")
1280
+ logger.info(f" Chunk duration: {config.MAX_CHUNK_DURATION}s with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
1281
+ logger.info(f" Audio: {config.AUDIO_CODEC} @ {config.AUDIO_BITRATE}")
1282
+ logger.info(f" Alpha refinement: Smoothing={config.ALPHA_SMOOTHING}, Edge blur={config.ALPHA_EDGE_BLUR}")
1283
+ logger.info(f" Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")