Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Sep 9, 2025

Commit

16a0e0c

verified ·

1 Parent(s): df76738

Update processing/two_stage/two_stage_processor.py

Browse files

Files changed (1) hide show

processing/two_stage/two_stage_processor.py +390 -164

processing/two_stage/two_stage_processor.py CHANGED Viewed

@@ -1,13 +1,17 @@
 #!/usr/bin/env python3
 """
-EFFICIENT Two-Stage Alpha Channel Processing System ✅ 2025-09-09
 VIDEO-TO-VIDEO PIPELINE: Direct alpha compositing without green screen
 Stage 1: SAM2 creates reference mask from first 3 frames
 Stage 2: MatAnyone processes entire video → pha.mp4 (alpha matte)
 Stage 3: Direct alpha compositing with background → final.mp4
 FEATURES:
 - No green screen or chroma key needed
 - Direct alpha channel compositing
 - Chunked processing for long videos with memory management
 - Dimension mismatch handling
 - Memory optimization with light/deep cleanup modes
@@ -30,7 +34,7 @@
 import subprocess
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, List
-from moviepy.editor import VideoFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips
 import moviepy.video.fx.all as vfx
 # Setup logging first
@@ -54,7 +58,7 @@
 # ==============================================================================
 class ProcessingConfig:
-    """Configuration settings for two-stage processing."""
     # Reference mask creation
     REFERENCE_FRAMES = 3  # Number of frames to use for reference mask
@@ -68,9 +72,18 @@ class ProcessingConfig:
     VIDEO_CODEC = 'libx264'
     VIDEO_BITRATE = '8000k'
     AUDIO_CODEC = 'aac'
     # Processing limits
     MAX_FRAMES_FOR_REFERENCE = 10  # Safety limit
     # Memory management
     CLEAR_CACHE_AFTER_STAGE = True  # Clear GPU cache between stages
@@ -82,6 +95,10 @@ class ProcessingConfig:
     SAVE_DEBUG_FILES = True
     DEBUG_FRAME_INTERVAL = 30  # Save every Nth frame for debugging
     ENABLE_MEMORY_TESTING = False  # DISABLED: Memory testing causes predictor deletion
 # ==============================================================================
 # CHAPTER 3: MEMORY MANAGEMENT UTILITIES
@@ -286,11 +303,96 @@ def get_profile(cls, quality: str = 'medium') -> Dict[str, Any]:
         return cls.PROFILES.get(quality, cls.PROFILES['medium'])
 # ==============================================================================
-# CHAPTER 5: CHUNKED VIDEO PROCESSOR
 # ==============================================================================
 class ChunkedVideoProcessor:
-    """Handles splitting and reassembling videos for chunked processing."""
     def __init__(self, temp_dir: str, config: ProcessingConfig):
         self.temp_dir = temp_dir
@@ -311,7 +413,7 @@ def should_chunk_video(self, video_path: str) -> bool:
             return False
     def split_video_into_chunks(self, video_path: str) -> List[str]:
-        """Split video into chunks for processing."""
         try:
             with VideoFileClip(video_path) as clip:
                 duration = clip.duration
@@ -325,24 +427,40 @@ def split_video_into_chunks(self, video_path: str) -> List[str]:
                     # Calculate chunk end time
                     end_time = min(current_time + chunk_duration, duration)
-                    # Create chunk
                     chunk_clip = clip.subclip(current_time, end_time)
                     chunk_path = os.path.join(self.chunks_dir, f"chunk_{chunk_index:03d}.mp4")
                     chunk_clip.write_videofile(
                         chunk_path,
                         codec=self.config.VIDEO_CODEC,
-                        audio_codec=self.config.AUDIO_CODEC,
                         verbose=False,
                         logger=None
                     )
                     chunk_clip.close()
                     chunk_paths.append(chunk_path)
-                    logger.info(f"Created chunk {chunk_index}: {current_time:.1f}s - {end_time:.1f}s")
                     # Move to next chunk with slight overlap for smooth transitions
-                    current_time = end_time - (self.config.CHUNK_OVERLAP_FRAMES / clip.fps)
                     chunk_index += 1
                 logger.info(f"Split video into {len(chunk_paths)} chunks")
@@ -352,13 +470,31 @@ def split_video_into_chunks(self, video_path: str) -> List[str]:
             logger.error(f"Video chunking failed: {e}")
             raise
-    def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str) -> str:
-        """Reassemble processed chunks into final video."""
         try:
             if len(processed_chunk_paths) == 1:
-                # Single chunk, just copy
-                shutil.copy2(processed_chunk_paths[0], output_path)
-                logger.info("Single chunk, copied directly to output")
                 return output_path
             # Load all chunks
@@ -371,11 +507,23 @@ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str)
             logger.info(f"Concatenating {len(chunk_clips)} chunks...")
             final_clip = concatenate_videoclips(chunk_clips, method="compose")
-            # Write final video
             final_clip.write_videofile(
                 output_path,
                 codec=self.config.VIDEO_CODEC,
-                audio_codec=self.config.AUDIO_CODEC,
                 verbose=False,
                 logger=None
             )
@@ -393,19 +541,24 @@ def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str)
             raise
 # ==============================================================================
-# CHAPTER 6: TWOSTAGEPROCESSOR CLASS DEFINITION
 # ==============================================================================
 class TwoStageProcessor:
     """
-    Efficient two-stage alpha channel processor with video-to-video pipeline.
     This processor avoids green screen entirely by using alpha channels:
     1. SAM2 creates reference mask from first few frames
     2. MatAnyone processes entire video using reference mask → pha.mp4 (alpha matte)
     3. Direct alpha compositing with background → final.mp4
-    No chroma key or green screen needed!
     """
     def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
@@ -416,6 +569,7 @@ def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = No
         self.config = ProcessingConfig()
         self.memory_manager = MemoryManager(self.config)
         self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
         # Ensure temp directory exists
         os.makedirs(self.temp_dir, exist_ok=True)
@@ -433,26 +587,37 @@ def process_video(self,
                      callback: Optional[callable] = None,
                      **kwargs) -> Tuple[str, str]:
         """
-        Main processing pipeline - video to video with alpha compositing.
         Returns:
             Tuple[str, str]: (final_output_path, status_message)
         """
         try:
-            logger.info(f"🎬 Two-Stage Alpha Pipeline: {video_path}")
             logger.info(f"🎯 Background: {background_path}")
             logger.info(f"📁 Temp: {self.temp_dir}")
             logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
             # Determine if chunking is needed
             needs_chunking = self.chunked_processor.should_chunk_video(video_path)
             if needs_chunking:
                 logger.info("Video exceeds chunk duration, using chunked processing...")
-                result = self._process_chunked_video(video_path, background_path, output_path, quality, callback)
             else:
                 logger.info("Video within chunk duration, using single-pass processing...")
-                result = self._process_single_video(video_path, background_path, output_path, quality, trim_seconds, callback)
             # Final memory report
             logger.info(self.memory_manager.get_memory_report())
@@ -465,11 +630,36 @@ def process_video(self,
             logger.error(f"Traceback: {traceback.format_exc()}")
             return None, error_msg
     def _process_chunked_video(self,
                               video_path: str,
                               background_path: str,
                               output_path: str,
                               quality: str,
                               callback: Optional[callable] = None) -> Tuple[str, str]:
         """Process long video using chunked approach."""
         try:
@@ -491,12 +681,12 @@ def _process_chunked_video(self,
                 logger.info(f"Processing chunk {i+1}/{total_chunks}: {chunk_path}")
                 logger.info(f"Pre-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
-                # Process this chunk at full resolution
                 chunk_output = os.path.join(self.temp_dir, f"processed_chunk_{i:03d}.mp4")
                 result_path, status = self._process_single_video(
                     chunk_path, background_path, chunk_output, quality,
-                    trim_seconds=None, callback=None  # No sub-callbacks for chunks
                 )
                 if result_path and os.path.exists(result_path):
@@ -516,16 +706,18 @@ def _process_chunked_video(self,
                 logger.info(f"Post-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
             if callback:
-                callback("Reassembling chunks...", 85)
-            # Reassemble chunks
-            final_path = self.chunked_processor.reassemble_chunks(processed_chunks, output_path)
             if callback:
                 callback("Chunked processing completed!", 100)
             logger.info(f"✅ Chunked processing completed: {final_path}")
-            return final_path, f"Success - Processed {total_chunks} chunks"
         except Exception as e:
             logger.error(f"Chunked processing failed: {e}")
@@ -537,6 +729,7 @@ def _process_single_video(self,
                              output_path: str,
                              quality: str,
                              trim_seconds: Optional[int] = None,
                              callback: Optional[callable] = None) -> Tuple[str, str]:
         """Process a single video (or chunk) through the pipeline."""
         try:
@@ -562,11 +755,14 @@ def _process_single_video(self,
             self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
             self.memory_manager.cleanup_stage("MatAnyone", force=True)
-            # Stage 3: Direct alpha compositing (no green screen!)
             if callback:
                 callback("Alpha compositing with background...", 70)
-            logger.info("STAGE 3: Direct alpha compositing with background...")
-            final_path = self._stage3_alpha_composite(video_path, alpha_video_path, background_path, output_path, quality)
             # Final memory cleanup
             self.memory_manager.cleanup_stage("Final")
@@ -605,7 +801,7 @@ def cleanup(self):
             logger.warning(f"Failed to cleanup: {e}")
 # ==============================================================================
-# CHAPTER 7: STAGE 1 - REFERENCE MASK CREATION (SAM2)
 # ==============================================================================
     def _stage1_create_reference_mask(self, video_path: str) -> str:
@@ -730,7 +926,7 @@ def _combine_reference_masks(self, masks: list) -> np.ndarray:
         return result
 # ==============================================================================
-# CHAPTER 8: STAGE 2 - MATANYONE PROCESSING
 # ==============================================================================
     def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
@@ -815,14 +1011,15 @@ def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str
             raise
     def _trim_video(self, input_path: str, output_path: str, seconds: int):
-        """Trim video to specified duration."""
         try:
             with VideoFileClip(input_path) as clip:
                 trimmed = clip.subclip(0, min(seconds, clip.duration))
                 trimmed.write_videofile(
                     output_path,
                     codec=self.config.VIDEO_CODEC,
-                    audio_codec=self.config.AUDIO_CODEC,
                     verbose=False,
                     logger=None
                 )
@@ -832,149 +1029,163 @@ def _trim_video(self, input_path: str, output_path: str, seconds: int):
             raise
 # ==============================================================================
-# CHAPTER 9: STAGE 3 - DIRECT ALPHA COMPOSITING (NO GREEN SCREEN!)
 # ==============================================================================
-    def _stage3_alpha_composite(self, original_video_path: str, alpha_video_path: str,
-                                background_path: str, output_path: str, quality: str) -> str:
         """
-        Stage 3: Direct alpha compositing without any green screen.
-        Uses the alpha matte from MatAnyone to composite the person
-        directly onto the new background.
         """
         try:
-            # Get quality profile
-            profile = QualityManager.get_profile(quality)
-            # Open videos
-            original_cap = cv2.VideoCapture(original_video_path)
-            alpha_cap = cv2.VideoCapture(alpha_video_path)
-            if not original_cap.isOpened():
-                raise RuntimeError(f"Cannot open original video: {original_video_path}")
-            if not alpha_cap.isOpened():
-                raise RuntimeError(f"Cannot open alpha video: {alpha_video_path}")
-            # Get video properties
-            fps = original_cap.get(cv2.CAP_PROP_FPS)
-            width = int(original_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-            height = int(original_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            total_frames = int(original_cap.get(cv2.CAP_PROP_FRAME_COUNT))
-            # Get alpha video dimensions
-            alpha_width = int(alpha_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-            alpha_height = int(alpha_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            logger.info(f"Original video: {width}x{height} @ {fps} FPS, {total_frames} frames")
-            logger.info(f"Alpha video dimensions: {alpha_width}x{alpha_height}")
-            # Load and prepare background
             if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
-                # Video background - process frame by frame
-                bg_cap = cv2.VideoCapture(background_path)
-                bg_is_video = True
             else:
-                # Image background
-                bg_image = cv2.imread(background_path)
-                if bg_image is None:
-                    raise RuntimeError(f"Cannot load background image: {background_path}")
-                # Resize to match video
-                bg_image = cv2.resize(bg_image, (width, height))
-                bg_is_video = False
-            # Setup video writer
-            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-            frame_count = 0
-            while True:
-                # Read frames
-                ret_orig, orig_frame = original_cap.read()
-                ret_alpha, alpha_frame = alpha_cap.read()
-                if not ret_orig or not ret_alpha:
-                    break
-                # Get background frame
-                if bg_is_video:
-                    ret_bg, bg_frame = bg_cap.read()
-                    if not ret_bg:
-                        # Loop background if it's shorter
-                        bg_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
-                        ret_bg, bg_frame = bg_cap.read()
-                    bg_frame = cv2.resize(bg_frame, (width, height))
-                else:
-                    bg_frame = bg_image.copy()
-                # Convert alpha frame to grayscale mask
-                if len(alpha_frame.shape) == 3:
-                    alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
-                else:
-                    alpha_mask = alpha_frame
-                # Handle dimension mismatch - resize alpha to match original if needed
-                if alpha_mask.shape[:2] != orig_frame.shape[:2]:
-                    if frame_count == 0:  # Log only once
-                        logger.info(f"Resizing alpha from {alpha_mask.shape[:2]} to {orig_frame.shape[:2]}")
-                    alpha_mask = cv2.resize(alpha_mask, (width, height), interpolation=cv2.INTER_LINEAR)
-                # Normalize alpha to 0-1 range
-                alpha_normalized = alpha_mask.astype(np.float32) / 255.0
-                # Create 3-channel alpha for compositing
-                alpha_3d = np.stack([alpha_normalized] * 3, axis=2)
-                # Composite: background where alpha=0, person where alpha=1
-                orig_frame_float = orig_frame.astype(np.float32)
-                bg_frame_float = bg_frame.astype(np.float32)
-                # Direct alpha compositing
-                composite = alpha_3d * orig_frame_float + (1 - alpha_3d) * bg_frame_float
-                composite = np.clip(composite, 0, 255).astype(np.uint8)
-                # Write frame
-                out.write(composite)
-                frame_count += 1
-                # Progress logging
-                if frame_count % self.config.MEMORY_CHECK_INTERVAL == 0:
-                    logger.info(f"Compositing progress: {frame_count}/{total_frames}")
-                    if self.memory_manager.should_clear_memory():
-                        logger.info("Memory high during compositing, clearing...")
-                        self.memory_manager.clear_gpu_cache()
-                # Save debug frame occasionally
-                if self.config.SAVE_DEBUG_FILES and frame_count % self.config.DEBUG_FRAME_INTERVAL == 0:
-                    debug_path = os.path.join(self.temp_dir, f"debug_composite_{frame_count:04d}.png")
-                    cv2.imwrite(debug_path, composite)
-            # Cleanup
-            original_cap.release()
-            alpha_cap.release()
-            out.release()
-            if bg_is_video:
-                bg_cap.release()
-            if frame_count == 0:
-                raise RuntimeError("No frames processed for output video")
             # Verify output
             if not os.path.exists(output_path):
                 raise RuntimeError("Output file was not created")
             file_size = os.path.getsize(output_path) / (1024 * 1024)  # MB
-            logger.info(f"✅ Alpha compositing completed: {output_path} ({file_size:.1f} MB)")
-            logger.info(f"   Processed {frame_count} frames")
             return output_path
         except Exception as e:
-            logger.error(f"Stage 3 alpha compositing failed: {e}")
             raise
 # ==============================================================================
-# CHAPTER 10: DEBUG AND UTILITY FUNCTIONS
 # ==============================================================================
     def _debug_video_info(self, video_path: str, label: str = "Video"):
@@ -1007,7 +1218,13 @@ def get_processing_stats(self) -> Dict[str, Any]:
             'temp_size_mb': 0,
             'debug_files': 0,
             'memory_info': self.memory_manager.get_gpu_memory_info(),
-            'memory_report': self.memory_manager.get_memory_report()
         }
         try:
@@ -1031,7 +1248,7 @@ def get_processing_stats(self) -> Dict[str, Any]:
         return stats
 # ==============================================================================
-# CHAPTER 11: EXPORT INTERFACE AND COMPATIBILITY
 # ==============================================================================
 def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
@@ -1045,13 +1262,22 @@ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
 # Main execution example
 if __name__ == "__main__":
     # Example usage - replace with your actual handlers
-    logger.info("TwoStageProcessor (Alpha Channel Version) loaded successfully")
     logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
     # Print configuration
     config = ProcessingConfig()
-    logger.info(f"Pipeline: SAM2 → MatAnyone → Direct Alpha Compositing")
-    logger.info(f"No green screen or chroma key needed!")
-    logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames")
-    logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
-    logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")

 #!/usr/bin/env python3
 """
+ENHANCED Two-Stage Alpha Channel Processing System ✅ 2025-09-09
 VIDEO-TO-VIDEO PIPELINE: Direct alpha compositing without green screen
 Stage 1: SAM2 creates reference mask from first 3 frames
 Stage 2: MatAnyone processes entire video → pha.mp4 (alpha matte)
 Stage 3: Direct alpha compositing with background → final.mp4
 FEATURES:
 - No green screen or chroma key needed
 - Direct alpha channel compositing
+- Audio preservation throughout pipeline
+- Full video duration processing (no truncation)
+- Alpha matte refinement for cleaner edges
 - Chunked processing for long videos with memory management
 - Dimension mismatch handling
 - Memory optimization with light/deep cleanup modes
 import subprocess
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, List
+from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, ImageClip, concatenate_videoclips, VideoClip
 import moviepy.video.fx.all as vfx
 # Setup logging first
 # ==============================================================================
 class ProcessingConfig:
+    """Configuration settings for two-stage processing with audio support."""
     # Reference mask creation
     REFERENCE_FRAMES = 3  # Number of frames to use for reference mask
     VIDEO_CODEC = 'libx264'
     VIDEO_BITRATE = '8000k'
     AUDIO_CODEC = 'aac'
+    AUDIO_BITRATE = '192k'  # Good quality audio
+    # Alpha refinement settings
+    ALPHA_SMOOTHING = 3  # Median blur kernel size for temporal smoothing
+    ALPHA_EDGE_BLUR = 1  # Gaussian blur radius for edge feathering
+    ALPHA_CONTRAST = 1.2  # Power curve for sharper alpha transitions
+    ENABLE_DEFRINGING = True  # Remove color fringing at edges
+    DEFRINGE_RADIUS = 1  # Radius for defringing operation
     # Processing limits
     MAX_FRAMES_FOR_REFERENCE = 10  # Safety limit
+    MIN_FINAL_CHUNK_DURATION = 10  # Don't create chunks smaller than this
     # Memory management
     CLEAR_CACHE_AFTER_STAGE = True  # Clear GPU cache between stages
     SAVE_DEBUG_FILES = True
     DEBUG_FRAME_INTERVAL = 30  # Save every Nth frame for debugging
     ENABLE_MEMORY_TESTING = False  # DISABLED: Memory testing causes predictor deletion
+    # Audio settings
+    PRESERVE_ORIGINAL_AUDIO = True  # Always try to preserve original audio
+    AUDIO_TEMP_FORMAT = 'm4a'  # Temporary audio format
 # ==============================================================================
 # CHAPTER 3: MEMORY MANAGEMENT UTILITIES
         return cls.PROFILES.get(quality, cls.PROFILES['medium'])
 # ==============================================================================
+# CHAPTER 5: ALPHA REFINEMENT UTILITIES
+# ==============================================================================
+class AlphaRefiner:
+    """Utilities for refining alpha mattes for better compositing."""
+    @staticmethod
+    def refine_alpha_matte(alpha: np.ndarray, config: ProcessingConfig) -> np.ndarray:
+        """
+        Refine alpha matte for cleaner compositing.
+        Args:
+            alpha: Input alpha matte
+            config: Processing configuration
+        Returns:
+            Refined alpha matte
+        """
+        # Ensure single channel
+        if len(alpha.shape) == 3:
+            alpha = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
+        # Normalize to 0-1 range if needed
+        if alpha.dtype == np.uint8:
+            alpha = alpha.astype(np.float32) / 255.0
+        # Temporal smoothing to reduce flickering
+        if config.ALPHA_SMOOTHING > 0:
+            kernel_size = config.ALPHA_SMOOTHING
+            # Ensure odd kernel size
+            if kernel_size % 2 == 0:
+                kernel_size += 1
+            alpha = cv2.medianBlur((alpha * 255).astype(np.uint8), kernel_size).astype(np.float32) / 255.0
+        # Edge feathering for softer transitions
+        if config.ALPHA_EDGE_BLUR > 0:
+            kernel_size = config.ALPHA_EDGE_BLUR * 2 + 1
+            alpha = cv2.GaussianBlur(alpha, (kernel_size, kernel_size), 0)
+        # Apply contrast adjustment for sharper boundaries
+        if config.ALPHA_CONTRAST != 1.0:
+            alpha = np.power(alpha, config.ALPHA_CONTRAST)
+        # Ensure valid range
+        alpha = np.clip(alpha, 0, 1)
+        return alpha
+    @staticmethod
+    def defringe_edges(composite: np.ndarray, alpha: np.ndarray, radius: int = 1) -> np.ndarray:
+        """
+        Remove color fringing at alpha edges.
+        Args:
+            composite: Composited image
+            alpha: Alpha matte used for compositing
+            radius: Radius for defringing operation
+        Returns:
+            Defringed composite
+        """
+        # Ensure alpha is single channel
+        if len(alpha.shape) == 3:
+            alpha = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
+        # Normalize alpha if needed
+        if alpha.dtype == np.uint8:
+            alpha = alpha.astype(np.float32) / 255.0
+        # Create edge mask using morphological operations
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (radius*2+1, radius*2+1))
+        dilated = cv2.dilate(alpha, kernel)
+        eroded = cv2.erode(alpha, kernel)
+        edge_mask = dilated - eroded
+        # Apply slight blur to edges only
+        blurred = cv2.GaussianBlur(composite, (3, 3), 0)
+        # Blend based on edge mask
+        edge_mask_3d = np.stack([edge_mask] * 3, axis=2)
+        result = composite * (1 - edge_mask_3d) + blurred * edge_mask_3d
+        return result.astype(np.uint8)
+# ==============================================================================
+# CHAPTER 6: ENHANCED CHUNKED VIDEO PROCESSOR
 # ==============================================================================
 class ChunkedVideoProcessor:
+    """Handles splitting and reassembling videos for chunked processing with audio support."""
     def __init__(self, temp_dir: str, config: ProcessingConfig):
         self.temp_dir = temp_dir
             return False
     def split_video_into_chunks(self, video_path: str) -> List[str]:
+        """Split video into chunks for processing, preserving audio."""
         try:
             with VideoFileClip(video_path) as clip:
                 duration = clip.duration
                     # Calculate chunk end time
                     end_time = min(current_time + chunk_duration, duration)
+                    # Check if remaining duration after this chunk is too small
+                    remaining_after = duration - end_time
+                    if remaining_after > 0 and remaining_after < self.config.MIN_FINAL_CHUNK_DURATION:
+                        # Include the remaining portion in this chunk
+                        end_time = duration
+                        logger.info(f"Including final {remaining_after:.1f}s in chunk {chunk_index}")
+                    # Create chunk with audio
                     chunk_clip = clip.subclip(current_time, end_time)
                     chunk_path = os.path.join(self.chunks_dir, f"chunk_{chunk_index:03d}.mp4")
+                    # Write chunk with audio preservation
                     chunk_clip.write_videofile(
                         chunk_path,
                         codec=self.config.VIDEO_CODEC,
+                        audio_codec=self.config.AUDIO_CODEC if chunk_clip.audio else None,
+                        audio_bitrate=self.config.AUDIO_BITRATE if chunk_clip.audio else None,
+                        temp_audiofile=os.path.join(self.temp_dir, f"temp_audio_chunk_{chunk_index}.{self.config.AUDIO_TEMP_FORMAT}"),
+                        remove_temp=True,
                         verbose=False,
                         logger=None
                     )
                     chunk_clip.close()
                     chunk_paths.append(chunk_path)
+                    logger.info(f"Created chunk {chunk_index}: {current_time:.1f}s - {end_time:.1f}s (duration: {end_time - current_time:.1f}s)")
                     # Move to next chunk with slight overlap for smooth transitions
+                    if end_time < duration:  # Not the last chunk
+                        overlap_seconds = self.config.CHUNK_OVERLAP_FRAMES / clip.fps
+                        current_time = end_time - overlap_seconds
+                    else:
+                        current_time = duration  # We're done
                     chunk_index += 1
                 logger.info(f"Split video into {len(chunk_paths)} chunks")
             logger.error(f"Video chunking failed: {e}")
             raise
+    def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str,
+                         original_audio_path: Optional[str] = None) -> str:
+        """Reassemble processed chunks into final video with original audio."""
         try:
             if len(processed_chunk_paths) == 1:
+                # Single chunk, just copy or add audio
+                if original_audio_path and os.path.exists(original_audio_path):
+                    # Add audio to single chunk
+                    with VideoFileClip(processed_chunk_paths[0]) as video:
+                        audio = AudioFileClip(original_audio_path)
+                        final = video.set_audio(audio)
+                        final.write_videofile(
+                            output_path,
+                            codec=self.config.VIDEO_CODEC,
+                            audio_codec=self.config.AUDIO_CODEC,
+                            audio_bitrate=self.config.AUDIO_BITRATE,
+                            temp_audiofile=os.path.join(self.temp_dir, "temp_single_audio.m4a"),
+                            remove_temp=True,
+                            verbose=False,
+                            logger=None
+                        )
+                        final.close()
+                else:
+                    shutil.copy2(processed_chunk_paths[0], output_path)
+                logger.info("Single chunk processed")
                 return output_path
             # Load all chunks
             logger.info(f"Concatenating {len(chunk_clips)} chunks...")
             final_clip = concatenate_videoclips(chunk_clips, method="compose")
+            # Add original audio if provided
+            if original_audio_path and os.path.exists(original_audio_path):
+                logger.info("Adding original audio track...")
+                audio_clip = AudioFileClip(original_audio_path)
+                # Ensure audio matches video duration
+                if audio_clip.duration > final_clip.duration:
+                    audio_clip = audio_clip.subclip(0, final_clip.duration)
+                final_clip = final_clip.set_audio(audio_clip)
+            # Write final video with audio
             final_clip.write_videofile(
                 output_path,
                 codec=self.config.VIDEO_CODEC,
+                audio_codec=self.config.AUDIO_CODEC if final_clip.audio else None,
+                audio_bitrate=self.config.AUDIO_BITRATE if final_clip.audio else None,
+                temp_audiofile=os.path.join(self.temp_dir, "temp_final_audio.m4a"),
+                remove_temp=True,
                 verbose=False,
                 logger=None
             )
             raise
 # ==============================================================================
+# CHAPTER 7: TWOSTAGEPROCESSOR CLASS DEFINITION
 # ==============================================================================
 class TwoStageProcessor:
     """
+    Enhanced two-stage alpha channel processor with video-to-video pipeline.
     This processor avoids green screen entirely by using alpha channels:
     1. SAM2 creates reference mask from first few frames
     2. MatAnyone processes entire video using reference mask → pha.mp4 (alpha matte)
     3. Direct alpha compositing with background → final.mp4
+    Features:
+    - No chroma key or green screen needed
+    - Audio preservation throughout pipeline
+    - Full video duration processing
+    - Alpha matte refinement for cleaner edges
+    - Chunked processing for long videos
     """
     def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
         self.config = ProcessingConfig()
         self.memory_manager = MemoryManager(self.config)
         self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
+        self.alpha_refiner = AlphaRefiner()
         # Ensure temp directory exists
         os.makedirs(self.temp_dir, exist_ok=True)
                      callback: Optional[callable] = None,
                      **kwargs) -> Tuple[str, str]:
         """
+        Main processing pipeline - video to video with alpha compositing and audio.
         Returns:
             Tuple[str, str]: (final_output_path, status_message)
         """
         try:
+            logger.info(f"🎬 Enhanced Two-Stage Alpha Pipeline: {video_path}")
             logger.info(f"🎯 Background: {background_path}")
             logger.info(f"📁 Temp: {self.temp_dir}")
             logger.info(f"🧠 Initial {self.memory_manager.get_gpu_memory_info()}")
+            # Extract and save original audio first
+            original_audio_path = None
+            if self.config.PRESERVE_ORIGINAL_AUDIO:
+                original_audio_path = self._extract_audio(video_path)
             # Determine if chunking is needed
             needs_chunking = self.chunked_processor.should_chunk_video(video_path)
             if needs_chunking:
                 logger.info("Video exceeds chunk duration, using chunked processing...")
+                result = self._process_chunked_video(
+                    video_path, background_path, output_path,
+                    quality, original_audio_path, callback
+                )
             else:
                 logger.info("Video within chunk duration, using single-pass processing...")
+                result = self._process_single_video(
+                    video_path, background_path, output_path,
+                    quality, trim_seconds, original_audio_path, callback
+                )
             # Final memory report
             logger.info(self.memory_manager.get_memory_report())
             logger.error(f"Traceback: {traceback.format_exc()}")
             return None, error_msg
+    def _extract_audio(self, video_path: str) -> Optional[str]:
+        """Extract audio from original video for later reattachment."""
+        try:
+            audio_path = os.path.join(self.temp_dir, f"original_audio.{self.config.AUDIO_TEMP_FORMAT}")
+            with VideoFileClip(video_path) as clip:
+                if clip.audio is not None:
+                    clip.audio.write_audiofile(
+                        audio_path,
+                        codec=self.config.AUDIO_CODEC,
+                        bitrate=self.config.AUDIO_BITRATE,
+                        verbose=False,
+                        logger=None
+                    )
+                    logger.info(f"✅ Extracted audio: {audio_path}")
+                    return audio_path
+                else:
+                    logger.warning("Video has no audio track")
+                    return None
+        except Exception as e:
+            logger.warning(f"Could not extract audio: {e}")
+            return None
     def _process_chunked_video(self,
                               video_path: str,
                               background_path: str,
                               output_path: str,
                               quality: str,
+                              original_audio_path: Optional[str],
                               callback: Optional[callable] = None) -> Tuple[str, str]:
         """Process long video using chunked approach."""
         try:
                 logger.info(f"Processing chunk {i+1}/{total_chunks}: {chunk_path}")
                 logger.info(f"Pre-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
+                # Process this chunk at full resolution without audio (add at the end)
                 chunk_output = os.path.join(self.temp_dir, f"processed_chunk_{i:03d}.mp4")
                 result_path, status = self._process_single_video(
                     chunk_path, background_path, chunk_output, quality,
+                    trim_seconds=None, original_audio_path=None, callback=None
                 )
                 if result_path and os.path.exists(result_path):
                 logger.info(f"Post-chunk memory: {self.memory_manager.get_gpu_memory_info()}")
             if callback:
+                callback("Reassembling chunks with audio...", 85)
+            # Reassemble chunks with original audio
+            final_path = self.chunked_processor.reassemble_chunks(
+                processed_chunks, output_path, original_audio_path
+            )
             if callback:
                 callback("Chunked processing completed!", 100)
             logger.info(f"✅ Chunked processing completed: {final_path}")
+            return final_path, f"Success - Processed {total_chunks} chunks with audio"
         except Exception as e:
             logger.error(f"Chunked processing failed: {e}")
                              output_path: str,
                              quality: str,
                              trim_seconds: Optional[int] = None,
+                             original_audio_path: Optional[str] = None,
                              callback: Optional[callable] = None) -> Tuple[str, str]:
         """Process a single video (or chunk) through the pipeline."""
         try:
             self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=False)
             self.memory_manager.cleanup_stage("MatAnyone", force=True)
+            # Stage 3: Enhanced alpha compositing with audio
             if callback:
                 callback("Alpha compositing with background...", 70)
+            logger.info("STAGE 3: Enhanced alpha compositing with background...")
+            final_path = self._stage3_enhanced_alpha_composite(
+                video_path, alpha_video_path, background_path,
+                output_path, quality, original_audio_path
+            )
             # Final memory cleanup
             self.memory_manager.cleanup_stage("Final")
             logger.warning(f"Failed to cleanup: {e}")
 # ==============================================================================
+# CHAPTER 8: STAGE 1 - REFERENCE MASK CREATION (SAM2)
 # ==============================================================================
     def _stage1_create_reference_mask(self, video_path: str) -> str:
         return result
 # ==============================================================================
+# CHAPTER 9: STAGE 2 - MATANYONE PROCESSING
 # ==============================================================================
     def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str, trim_seconds: Optional[int]) -> str:
             raise
     def _trim_video(self, input_path: str, output_path: str, seconds: int):
+        """Trim video to specified duration with audio preservation."""
         try:
             with VideoFileClip(input_path) as clip:
                 trimmed = clip.subclip(0, min(seconds, clip.duration))
                 trimmed.write_videofile(
                     output_path,
                     codec=self.config.VIDEO_CODEC,
+                    audio_codec=self.config.AUDIO_CODEC if clip.audio else None,
+                    audio_bitrate=self.config.AUDIO_BITRATE if clip.audio else None,
                     verbose=False,
                     logger=None
                 )
             raise
 # ==============================================================================
+# CHAPTER 10: STAGE 3 - ENHANCED ALPHA COMPOSITING WITH AUDIO
 # ==============================================================================
+    def _stage3_enhanced_alpha_composite(self, original_video_path: str,
+                                         alpha_video_path: str,
+                                         background_path: str,
+                                         output_path: str,
+                                         quality: str,
+                                         audio_path: Optional[str]) -> str:
         """
+        Stage 3: Enhanced alpha compositing with refinement and audio support.
+        Uses MoviePy for better audio handling and applies alpha refinement
+        for cleaner edges and better compositing quality.
         """
         try:
+            # Load videos using MoviePy for better audio support
+            logger.info("Loading video clips...")
+            original_clip = VideoFileClip(original_video_path)
+            alpha_clip = VideoFileClip(alpha_video_path)
+            # Load background
             if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
+                logger.info("Loading video background...")
+                background_clip = VideoFileClip(background_path)
+                background_clip = background_clip.resize(original_clip.size)
+                # Loop background if it's shorter than the foreground
+                if background_clip.duration < original_clip.duration:
+                    logger.info("Looping background video to match duration...")
+                    background_clip = background_clip.loop(duration=original_clip.duration)
             else:
+                logger.info("Loading image background...")
+                background_clip = ImageClip(background_path).set_duration(original_clip.duration)
+                background_clip = background_clip.resize(original_clip.size)
+            # Get video properties for logging
+            fps = original_clip.fps
+            width, height = original_clip.size
+            duration = original_clip.duration
+            total_frames = int(fps * duration)
+            logger.info(f"Video properties: {width}x{height} @ {fps:.1f} FPS")
+            logger.info(f"Duration: {duration:.1f}s ({total_frames} frames)")
+            # Create enhanced composite function with alpha refinement
+            def make_frame_enhanced(t):
+                """Create composite frame at time t with alpha refinement."""
+                try:
+                    # Get frames at time t
+                    orig_frame = original_clip.get_frame(t)
+                    alpha_frame = alpha_clip.get_frame(t)
+                    bg_frame = background_clip.get_frame(t)
+                    # Convert alpha to grayscale if needed
+                    if len(alpha_frame.shape) == 3:
+                        alpha_mask = cv2.cvtColor(alpha_frame.astype(np.uint8), cv2.COLOR_RGB2GRAY)
+                    else:
+                        alpha_mask = alpha_frame
+                    # Apply alpha refinement for cleaner edges
+                    alpha_refined = self.alpha_refiner.refine_alpha_matte(alpha_mask, self.config)
+                    # Handle dimension mismatch
+                    if alpha_refined.shape[:2] != orig_frame.shape[:2]:
+                        alpha_refined = cv2.resize(alpha_refined,
+                                                  (orig_frame.shape[1], orig_frame.shape[0]),
+                                                  interpolation=cv2.INTER_LINEAR)
+                    # Create 3-channel alpha for compositing
+                    alpha_3d = np.stack([alpha_refined] * 3, axis=2)
+                    # Direct alpha compositing
+                    composite = alpha_3d * orig_frame + (1 - alpha_3d) * bg_frame
+                    # Optional: remove color fringing at edges
+                    if self.config.ENABLE_DEFRINGING:
+                        composite = self.alpha_refiner.defringe_edges(
+                            composite.astype(np.uint8),
+                            alpha_refined,
+                            radius=self.config.DEFRINGE_RADIUS
+                        )
+                    return np.clip(composite, 0, 255).astype(np.uint8)
+                except Exception as e:
+                    logger.error(f"Frame processing error at t={t}: {e}")
+                    # Return original frame as fallback
+                    return original_clip.get_frame(t)
+            # Create composite video with enhanced processing
+            logger.info("Creating composite video with alpha refinement...")
+            composite_clip = VideoClip(make_frame_enhanced, duration=original_clip.duration)
+            composite_clip = composite_clip.set_fps(original_clip.fps)
+            # Add audio (prioritize provided audio path, then original audio)
+            if audio_path and os.path.exists(audio_path):
+                logger.info(f"Adding audio from: {audio_path}")
+                audio_clip = AudioFileClip(audio_path)
+                # Ensure audio matches video duration
+                if audio_clip.duration > composite_clip.duration:
+                    audio_clip = audio_clip.subclip(0, composite_clip.duration)
+                composite_clip = composite_clip.set_audio(audio_clip)
+            elif original_clip.audio is not None:
+                logger.info("Using original video's audio...")
+                composite_clip = composite_clip.set_audio(original_clip.audio)
+            else:
+                logger.warning("No audio available for final output")
+            # Get quality profile
+            profile = QualityManager.get_profile(quality)
+            # Write final video with audio
+            logger.info(f"Writing final video with quality profile: {quality}")
+            composite_clip.write_videofile(
+                output_path,
+                codec=self.config.VIDEO_CODEC,
+                bitrate=profile['bitrate'],
+                audio_codec=self.config.AUDIO_CODEC if composite_clip.audio else None,
+                audio_bitrate=self.config.AUDIO_BITRATE if composite_clip.audio else None,
+                temp_audiofile=os.path.join(self.temp_dir, f"temp_composite_audio.{self.config.AUDIO_TEMP_FORMAT}"),
+                remove_temp=True,
+                verbose=False,
+                logger=None
+            )
+            # Cleanup clips
+            logger.info("Cleaning up video clips...")
+            original_clip.close()
+            alpha_clip.close()
+            background_clip.close()
+            composite_clip.close()
             # Verify output
             if not os.path.exists(output_path):
                 raise RuntimeError("Output file was not created")
+            # Verify the output has proper duration
+            with VideoFileClip(output_path) as verify_clip:
+                output_duration = verify_clip.duration
+                if abs(output_duration - duration) > 1.0:  # Allow 1 second tolerance
+                    logger.warning(f"Duration mismatch: expected {duration:.1f}s, got {output_duration:.1f}s")
             file_size = os.path.getsize(output_path) / (1024 * 1024)  # MB
+            logger.info(f"✅ Enhanced compositing completed: {output_path}")
+            logger.info(f"   File size: {file_size:.1f} MB")
+            logger.info(f"   Duration: {output_duration:.1f}s")
+            logger.info(f"   Audio: {'Yes' if composite_clip.audio else 'No'}")
             return output_path
         except Exception as e:
+            logger.error(f"Stage 3 enhanced compositing failed: {e}")
             raise
 # ==============================================================================
+# CHAPTER 11: DEBUG AND UTILITY FUNCTIONS
 # ==============================================================================
     def _debug_video_info(self, video_path: str, label: str = "Video"):
             'temp_size_mb': 0,
             'debug_files': 0,
             'memory_info': self.memory_manager.get_gpu_memory_info(),
+            'memory_report': self.memory_manager.get_memory_report(),
+            'config': {
+                'audio_enabled': self.config.PRESERVE_ORIGINAL_AUDIO,
+                'alpha_refinement': self.config.ALPHA_SMOOTHING > 0,
+                'defringing': self.config.ENABLE_DEFRINGING,
+                'chunk_duration': self.config.MAX_CHUNK_DURATION
+            }
         }
         try:
         return stats
 # ==============================================================================
+# CHAPTER 12: EXPORT INTERFACE AND COMPATIBILITY
 # ==============================================================================
 def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
 # Main execution example
 if __name__ == "__main__":
     # Example usage - replace with your actual handlers
+    logger.info("Enhanced TwoStageProcessor (Alpha Channel Version) loaded successfully")
+    logger.info("Features:")
+    logger.info("  ✅ No green screen or chroma key needed")
+    logger.info("  ✅ Audio preservation throughout pipeline")
+    logger.info("  ✅ Full video duration processing")
+    logger.info("  ✅ Alpha matte refinement for cleaner edges")
+    logger.info("  ✅ Chunked processing for long videos")
+    logger.info("")
     logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
     # Print configuration
     config = ProcessingConfig()
+    logger.info(f"Pipeline: SAM2 → MatAnyone → Enhanced Alpha Compositing")
+    logger.info(f"Configuration:")
+    logger.info(f"  Reference frames: {config.REFERENCE_FRAMES}")
+    logger.info(f"  Chunk duration: {config.MAX_CHUNK_DURATION}s with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
+    logger.info(f"  Audio: {config.AUDIO_CODEC} @ {config.AUDIO_BITRATE}")
+    logger.info(f"  Alpha refinement: Smoothing={config.ALPHA_SMOOTHING}, Edge blur={config.ALPHA_EDGE_BLUR}")
+    logger.info(f"  Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")