Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 13, 2025

Commit

82a2981

1 Parent(s): 5353980

Update app.py

Browse files

Files changed (1) hide show

app.py +330 -536

app.py CHANGED Viewed

@@ -1,622 +1,416 @@
 #!/usr/bin/env python3
 """
-VideoBackgroundFX - SAM2 GPU-Optimized Video Background Replacement
-HuggingFace Space Deployment with L4 GPU Support
-Updated: 2025-08-13 - SAM2 Integration
 """
-import streamlit as st
-import cv2
-import numpy as np
-import tempfile
 import os
-from PIL import Image
-import requests
-from io import BytesIO
-import logging
-import base64
-import gc
 import torch
-import psutil
-# Configure logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# GPU Environment Setup
-def setup_gpu_environment():
-    """Setup GPU environment for L4 optimization"""
-    os.environ['OMP_NUM_THREADS'] = '8'
-    os.environ['ORT_PROVIDERS'] = 'CUDAExecutionProvider,CPUExecutionProvider'
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-    os.environ['TORCH_CUDA_ARCH_LIST'] = '8.9'  # L4 architecture
-    os.environ['CUDA_LAUNCH_BLOCKING'] = '0'
-    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
     try:
         if torch.cuda.is_available():
-            device_count = torch.cuda.device_count()
-            gpu_name = torch.cuda.get_device_name(0)
             gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
-            logger.info(f"🚀 GPU: {gpu_name} ({gpu_memory:.1f}GB)")
-            # Initialize CUDA context
-            torch.cuda.init()
-            torch.cuda.set_device(0)
-            # Enable optimizations for L4
-            torch.backends.cuda.matmul.allow_tf32 = True
-            torch.backends.cudnn.allow_tf32 = True
-            torch.backends.cudnn.benchmark = True
-            torch.backends.cudnn.deterministic = False
-            # Set memory fraction
-            torch.cuda.set_per_process_memory_fraction(0.8)
-            # Warm up GPU
-            dummy = torch.randn(1024, 1024, device='cuda')
-            dummy = dummy @ dummy.T
-            del dummy
-            torch.cuda.empty_cache()
-            return True, gpu_name, gpu_memory
         else:
-            logger.warning("⚠️ CUDA not available")
-            return False, None, 0
-    except Exception as e:
-        logger.error(f"GPU setup failed: {e}")
-        return False, None, 0
-# Initialize GPU
-CUDA_AVAILABLE, GPU_NAME, GPU_MEMORY = setup_gpu_environment()
-# SAM2 Integration
-try:
-    from segment_anything import sam_model_registry, SamPredictor
-    SAM_AVAILABLE = True
-    logger.info("✅ SAM loaded successfully")
-    # Initialize SAM with downloaded checkpoint
-    sam_checkpoint = "sam_vit_h_4b8939.pth"
-    model_type = "vit_h"
-    if os.path.exists(sam_checkpoint) and CUDA_AVAILABLE:
-        sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
-        sam.to(device='cuda')
-        sam_predictor = SamPredictor(sam)
-        logger.info("✅ SAM2 GPU predictor initialized")
-    else:
-        sam_predictor = None
-        if not os.path.exists(sam_checkpoint):
-            logger.warning(f"⚠️ SAM checkpoint not found: {sam_checkpoint}")
-except ImportError as e:
-    SAM_AVAILABLE = False
-    sam_predictor = None
-    logger.warning(f"⚠️ SAM not available: {e}")
-# Rembg with GPU optimization
-try:
-    from rembg import remove, new_session
-    import onnxruntime as ort
-    REMBG_AVAILABLE = True
-    logger.info("✅ Rembg loaded")
-    if CUDA_AVAILABLE:
-        providers = [
-            ('CUDAExecutionProvider', {
-                'device_id': 0,
-                'arena_extend_strategy': 'kSameAsRequested',
-                'gpu_mem_limit': 20 * 1024 * 1024 * 1024,  # 20GB for L4
-                'cudnn_conv_algo_search': 'HEURISTIC',
-            }),
-            'CPUExecutionProvider'
-        ]
-        rembg_session = new_session('u2net_human_seg', providers=providers)
-        # Warm up
-        dummy_img = Image.new('RGB', (512, 512), color='white')
-        with torch.cuda.amp.autocast():
-            _ = remove(dummy_img, session=rembg_session)
-        logger.info("✅ Rembg GPU session initialized")
-    else:
-        rembg_session = new_session('u2net_human_seg')
-        logger.info("✅ Rembg CPU session initialized")
-except ImportError as e:
-    REMBG_AVAILABLE = False
-    rembg_session = None
-    logger.warning(f"⚠️ Rembg not available: {e}")
-# OpenCV GPU check
-try:
-    if cv2.cuda.getCudaEnabledDeviceCount() > 0:
-        logger.info(f"✅ OpenCV CUDA devices: {cv2.cuda.getCudaEnabledDeviceCount()}")
-        OPENCV_GPU = True
-    else:
-        OPENCV_GPU = False
-        logger.warning("⚠️ OpenCV CUDA not available")
-except:
-    OPENCV_GPU = False
-    logger.warning("⚠️ OpenCV CUDA not available")
-# Memory management
-def optimize_memory():
-    """Optimize memory usage"""
-    if CUDA_AVAILABLE:
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
-    gc.collect()
-def get_memory_usage():
-    """Get current memory usage"""
-    stats = {}
-    if CUDA_AVAILABLE:
-        stats['gpu_allocated'] = torch.cuda.memory_allocated() / 1024**3
-        stats['gpu_reserved'] = torch.cuda.memory_reserved() / 1024**3
-        stats['gpu_free'] = GPU_MEMORY - stats['gpu_reserved']
-    else:
-        stats['gpu_allocated'] = 0
-        stats['gpu_reserved'] = 0
-        stats['gpu_free'] = 0
-    # System RAM
-    ram = psutil.virtual_memory()
-    stats['ram_used'] = ram.used / 1024**3
-    stats['ram_total'] = ram.total / 1024**3
-    stats['ram_percent'] = ram.percent
-    return stats
-# Background loading
-def load_background_image(background_url):
-    """Load background image from URL"""
-    try:
-        if background_url == "default_brick":
-            return create_default_background()
-        response = requests.get(background_url)
-        response.raise_for_status()
-        image = Image.open(BytesIO(response.content))
-        return np.array(image.convert('RGB'))
     except Exception as e:
-        logger.error(f"Failed to load background image: {e}")
-        return create_default_background()
-def create_default_background():
-    """Create a default brick wall background"""
-    background = np.zeros((720, 1280, 3), dtype=np.uint8)
-    background[:, :] = [139, 69, 19]  # Brown color
-    # Add brick pattern
-    for y in range(0, 720, 60):
-        for x in range(0, 1280, 120):
-            cv2.rectangle(background, (x, y), (x+115, y+55), (160, 82, 45), -1)
-            cv2.rectangle(background, (x, y), (x+115, y+55), (101, 67, 33), 2)
-    return background
-def get_professional_backgrounds():
-    """Get professional background collection"""
-    return {
-        "🏢 Modern Office": "https://images.unsplash.com/photo-1497366216548-37526070297c?w=1920&h=1080&fit=crop",
-        "🌆 City Skyline": "https://images.unsplash.com/photo-1449824913935-59a10b8d2000?w=1920&h=1080&fit=crop",
-        "🏖️ Tropical Beach": "https://images.unsplash.com/photo-1507525428034-b723cf961d3e?w=1920&h=1080&fit=crop",
-        "🌲 Forest Path": "https://images.unsplash.com/photo-1441974231531-c6227db76b6e?w=1920&h=1080&fit=crop",
-        "🎨 Abstract Blue": "https://images.unsplash.com/photo-1557683316-973673baf926?w=1920&h=1080&fit=crop",
-        "🏔️ Mountain View": "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?w=1920&h=1080&fit=crop",
-        "🌅 Sunset Gradient": "https://images.unsplash.com/photo-1495616811223-4d98c6e9c869?w=1920&h=1080&fit=crop",
-        "💼 Executive Suite": "https://images.unsplash.com/photo-1497366811353-6870744d04b2?w=1920&h=1080&fit=crop"
-    }
-# SAM2 Segmentation
-def segment_person_sam2(frame):
-    """SAM2 GPU-accelerated segmentation"""
-    try:
-        if SAM_AVAILABLE and sam_predictor and CUDA_AVAILABLE:
-            # Set image for SAM
-            sam_predictor.set_image(frame)
-            # Get image center as prompt (simple heuristic)
-            h, w = frame.shape[:2]
-            input_point = np.array([[w//2, h//2]])
-            input_label = np.array([1])
-            # Predict mask
-            with torch.no_grad():
-                masks, scores, logits = sam_predictor.predict(
-                    point_coords=input_point,
-                    point_labels=input_label,
-                    multimask_output=True,
-                )
-            # Use best mask
-            best_mask = masks[np.argmax(scores)]
-            return best_mask.astype(np.float32)
-        return None
-    except Exception as e:
-        logger.error(f"SAM2 segmentation failed: {e}")
-        return None
-# Rembg Segmentation
-def segment_person_rembg(frame):
-    """Rembg GPU-optimized segmentation"""
-    try:
-        if REMBG_AVAILABLE and rembg_session:
-            pil_image = Image.fromarray(frame)
-            if CUDA_AVAILABLE:
-                with torch.cuda.amp.autocast():
-                    output = remove(
-                        pil_image,
-                        session=rembg_session,
-                        alpha_matting=True,
-                        alpha_matting_foreground_threshold=240,
-                        alpha_matting_background_threshold=10,
-                        alpha_matting_erode_size=10
-                    )
-            else:
-                output = remove(pil_image, session=rembg_session, alpha_matting=True)
-            output_array = np.array(output)
-            if output_array.shape[2] == 4:
-                mask = output_array[:, :, 3].astype(np.float32) / 255.0
             else:
-                mask = np.ones((frame.shape[0], frame.shape[1]), dtype=np.float32)
-            return mask
-        return None
-    except Exception as e:
-        logger.error(f"Rembg segmentation failed: {e}")
-        return None
-# OpenCV GPU Segmentation
-def segment_person_opencv_gpu(frame):
-    """OpenCV GPU segmentation"""
-    try:
-        if OPENCV_GPU:
-            gpu_frame = cv2.cuda_GpuMat()
-            gpu_frame.upload(frame)
-            gpu_hsv = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_RGB2HSV)
-            lower_skin = np.array([0, 20, 70])
-            upper_skin = np.array([20, 255, 255])
-            gpu_mask = cv2.cuda.inRange(gpu_hsv, lower_skin, upper_skin)
             kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
-            gpu_mask = cv2.cuda.morphologyEx(gpu_mask, cv2.MORPH_CLOSE, kernel)
-            gpu_mask = cv2.cuda.morphologyEx(gpu_mask, cv2.MORPH_OPEN, kernel)
-            mask = gpu_mask.download()
-            del gpu_frame, gpu_hsv, gpu_mask
-            return mask.astype(float) / 255
-        else:
-            return segment_person_fallback_cpu(frame)
-    except Exception as e:
-        logger.error(f"OpenCV GPU segmentation failed: {e}")
-        return segment_person_fallback_cpu(frame)
-def segment_person_fallback_cpu(frame):
-    """CPU fallback segmentation"""
-    try:
-        hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)
-        lower_skin = np.array([0, 20, 70])
-        upper_skin = np.array([20, 255, 255])
-        mask = cv2.inRange(hsv, lower_skin, upper_skin)
-        kernel = np.ones((5, 5), np.uint8)
-        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
-        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
-        return mask.astype(float) / 255
-    except Exception as e:
-        logger.error(f"CPU fallback segmentation failed: {e}")
-        return None
-# Video Processing
-def process_video_gpu_optimized(video_path, background_url, progress_callback=None):
-    """GPU-optimized video processing"""
     try:
-        background_image = load_background_image(background_url)
         cap = cv2.VideoCapture(video_path)
         fps = int(cap.get(cv2.CAP_PROP_FPS))
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        logger.info(f"Processing video: {width}x{height}, {total_frames} frames, {fps} FPS")
-        output_path = tempfile.mktemp(suffix='.mp4')
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
         background_resized = cv2.resize(background_image, (width, height))
         frame_count = 0
-        batch_size = 4 if CUDA_AVAILABLE else 1
-        frame_batch = []
         while True:
             ret, frame = cap.read()
             if not ret:
-                if frame_batch:
-                    processed_batch = process_frame_batch(frame_batch, background_resized)
-                    for processed_frame in processed_batch:
-                        out.write(processed_frame)
                 break
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame_batch.append(frame_rgb)
-            if len(frame_batch) >= batch_size:
-                processed_batch = process_frame_batch(frame_batch, background_resized)
-                for processed_frame in processed_batch:
-                    out.write(processed_frame)
-                    frame_count += 1
-                    if progress_callback:
-                        progress = frame_count / total_frames
-                        memory_stats = get_memory_usage()
-                        progress_callback(
-                            progress,
-                            f"GPU Processing: {frame_count}/{total_frames} | "
-                            f"GPU: {memory_stats['gpu_allocated']:.1f}GB | "
-                            f"RAM: {memory_stats['ram_percent']:.1f}%"
-                        )
-                frame_batch = []
-                optimize_memory()
         cap.release()
         out.release()
-        optimize_memory()
-        logger.info(f"Video processing complete: {output_path}")
         return output_path
     except Exception as e:
-        logger.error(f"GPU video processing failed: {e}")
         return None
-def process_frame_batch(frame_batch, background_resized):
-    """Process batch of frames"""
-    processed_frames = []
-    for frame in frame_batch:
-        person_mask = None
-        method_used = "None"
-        # Try SAM2 first
-        if SAM_AVAILABLE and CUDA_AVAILABLE:
-            person_mask = segment_person_sam2(frame)
-            if person_mask is not None:
-                method_used = "SAM2-GPU"
-        # Try Rembg
-        if person_mask is None and REMBG_AVAILABLE:
-            person_mask = segment_person_rembg(frame)
-            if person_mask is not None:
-                method_used = "Rembg-GPU"
-        # Try OpenCV GPU
-        if person_mask is None and OPENCV_GPU:
-            person_mask = segment_person_opencv_gpu(frame)
-            if person_mask is not None:
-                method_used = "OpenCV-GPU"
-        # CPU fallback
-        if person_mask is None:
-            person_mask = segment_person_fallback_cpu(frame)
-            method_used = "CPU-Fallback"
-        if person_mask is not None:
-            if person_mask.ndim == 2:
-                person_mask = np.expand_dims(person_mask, axis=2)
-            final_frame = frame * person_mask + background_resized * (1 - person_mask)
-            final_frame = final_frame.astype(np.uint8)
-        else:
-            final_frame = frame
-        final_frame_bgr = cv2.cvtColor(final_frame, cv2.COLOR_RGB2BGR)
-        processed_frames.append(final_frame_bgr)
-    return processed_frames
-# Streamlit UI
 def main():
-    st.set_page_config(
-        page_title="VideoBackgroundFX - SAM2 GPU",
-        page_icon="🚀",
-        layout="wide",
-        initial_sidebar_state="expanded"
-    )
-    st.title("🚀 VideoBackgroundFX - SAM2 GPU-Optimized")
-    st.markdown("**High-performance video background replacement with SAM2 & GPU acceleration**")
-    # GPU Status Dashboard
-    col1, col2, col3, col4 = st.columns(4)
     with col1:
-        if CUDA_AVAILABLE:
-            st.success(f"🚀 GPU: {GPU_NAME}")
-            st.caption(f"{GPU_MEMORY:.1f}GB VRAM")
-        else:
-            st.warning("⚠️ CPU Mode")
-    with col2:
-        if SAM_AVAILABLE and CUDA_AVAILABLE:
-            st.success("✅ SAM2-GPU")
-        elif REMBG_AVAILABLE:
-            st.success("✅ Rembg-GPU")
-        else:
-            st.warning("⚠️ Basic Mode")
-    with col3:
-        if OPENCV_GPU:
-            st.success("✅ OpenCV-GPU")
-        else:
-            st.info("ℹ️ OpenCV-CPU")
-    with col4:
-        memory_stats = get_memory_usage()
-        if CUDA_AVAILABLE:
-            st.metric("GPU Memory", f"{memory_stats['gpu_allocated']:.1f}GB")
         else:
-            st.info("CPU Processing")
-    # Sidebar monitoring
-    with st.sidebar:
-        st.markdown("### 🚀 System Performance")
-        memory_stats = get_memory_usage()
-        if CUDA_AVAILABLE:
-            st.metric("GPU Allocated", f"{memory_stats['gpu_allocated']:.2f}GB")
-            st.metric("GPU Reserved", f"{memory_stats['gpu_reserved']:.2f}GB")
-            st.metric("GPU Free", f"{memory_stats['gpu_free']:.2f}GB")
-            usage_percent = (memory_stats['gpu_reserved'] / GPU_MEMORY) * 100
-            st.progress(usage_percent / 100)
-            st.caption(f"{usage_percent:.1f}% GPU Memory Used")
-        st.metric("RAM Used", f"{memory_stats['ram_used']:.1f}GB")
-        st.metric("RAM Total", f"{memory_stats['ram_total']:.1f}GB")
-        st.progress(memory_stats['ram_percent'] / 100)
-        st.caption(f"{memory_stats['ram_percent']:.1f}% RAM Used")
-        st.markdown("---")
-        st.markdown("### 🛠️ Processing Methods")
-        methods = []
-        if SAM_AVAILABLE and CUDA_AVAILABLE:
-            methods.append("🚀 SAM2-GPU (Ultra Precise)")
-        if REMBG_AVAILABLE:
-            methods.append("✅ Rembg-GPU (High Quality)")
-        if OPENCV_GPU:
-            methods.append("⚡ OpenCV-GPU (Fast)")
-        methods.append("💻 CPU Fallback")
-        for method in methods:
-            st.markdown(method)
-    # Main interface
-    col1, col2 = st.columns(2)
-    # Initialize session state
-    if 'video_path' not in st.session_state:
-        st.session_state.video_path = None
-    if 'video_bytes' not in st.session_state:
-        st.session_state.video_bytes = None
-    if 'video_name' not in st.session_state:
-        st.session_state.video_name = None
-    with col1:
-        st.markdown("### 📹 Upload Video")
-        uploaded_video = st.file_uploader(
-            "Choose a video file",
-            type=['mp4', 'avi', 'mov', 'mkv'],
-            help="Upload video for SAM2 GPU processing"
-        )
         if uploaded_video:
-            if st.session_state.video_name != uploaded_video.name:
-                st.success(f"✅ Video uploaded: {uploaded_video.name}")
-                video_bytes = uploaded_video.read()
-                with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
-                    tmp_file.write(video_bytes)
-                    video_path = tmp_file.name
-                st.session_state.video_path = video_path
-                st.session_state.video_bytes = video_bytes
-                st.session_state.video_name = uploaded_video.name
-            if st.session_state.video_bytes is not None:
-                st.video(st.session_state.video_bytes)
-        elif st.session_state.video_path:
-            st.success(f"✅ Video ready: {st.session_state.video_name}")
-            st.video(st.session_state.video_bytes)
-    with col2:
-        st.markdown("### 🖼️ Background Selection")
-        background_options = get_professional_backgrounds()
-        selected_background = st.selectbox(
-            "Choose background",
-            options=list(background_options.keys()),
-            index=0
-        )
-        background_url = background_options[selected_background]
-        try:
-            background_image = load_background_image(background_url)
-            st.image(background_image, caption=f"Background: {selected_background}", use_container_width=True)
-        except:
-            st.error("Failed to load background image")
-    # Processing button
-    if (uploaded_video or st.session_state.video_path) and st.button("🚀 Process with SAM2", type="primary"):
-        video_path = st.session_state.video_path
-        if video_path and os.path.exists(video_path):
-            progress_bar = st.progress(0)
-            status_text = st.empty()
-            def update_progress(progress, message):
-                progress_bar.progress(progress)
-                status_text.text(message)
-            try:
-                result_path = process_video_gpu_optimized(
-                    video_path,
-                    background_url,
-                    update_progress
-                )
-                if result_path and os.path.exists(result_path):
-                    status_text.text("✅ SAM2 processing complete!")
-                    with open(result_path, 'rb') as f:
-                        result_video = f.read()
-                    st.video(result_video)
-                    st.download_button(
-                        "💾 Download SAM2 Processed Video",
-                        data=result_video,
-                        file_name="sam2_backgroundfx_result.mp4",
-                        mime="video/mp4"
-                    )
-                    final_stats = get_memory_usage()
-                    st.success(f"🚀 SAM2 processing complete! GPU: {final_stats['gpu_allocated']:.2f}GB, RAM: {final_stats['ram_percent']:.1f}%")
-                    os.unlink(result_path)
-                else:
-                    st.error("❌ SAM2 processing failed!")
-            except Exception as e:
-                st.error(f"❌ Error during SAM2 processing: {str(e)}")
-                logger.error(f"SAM2 processing error: {e}")
-        else:
-            st.error("Video file not found. Please upload again.")
 if __name__ == "__main__":
-    main()

 #!/usr/bin/env python3
 """
+SAM2 (Segment Anything Model 2) for Video
+Correct implementation with dynamic model loading
+Optimized for video processing
 """
 import os
 import torch
+import numpy as np
+import streamlit as st
+from pathlib import Path
+import logging
+import requests
+from tqdm import tqdm
+import cv2
 logger = logging.getLogger(__name__)
+# ============================================
+# SAM2 DYNAMIC LOADER FOR VIDEO
+# ============================================
+@st.cache_resource(show_spinner=False)
+def load_sam2_model_dynamic():
+    """
+    Download and load SAM2 model dynamically
+    SAM2 is specifically designed for video segmentation
+    """
     try:
+        # Import SAM2 (not SAM1!)
+        from sam2.build_sam import build_sam2
+        from sam2.sam2_image_predictor import SAM2ImagePredictor
+        from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+        # SAM2 Model URLs (these are the NEW video-optimized models)
+        MODEL_URLS = {
+            'sam2_hiera_large': {
+                'config': 'sam2_hiera_l.yaml',
+                'checkpoint': 'https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_large.pt',
+                'size': '897MB',
+                'quality': 'Best for video'
+            },
+            'sam2_hiera_base_plus': {
+                'config': 'sam2_hiera_b+.yaml',
+                'checkpoint': 'https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_base_plus.pt',
+                'size': '323MB',
+                'quality': 'Balanced'
+            },
+            'sam2_hiera_small': {
+                'config': 'sam2_hiera_s.yaml',
+                'checkpoint': 'https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_small.pt',
+                'size': '155MB',
+                'quality': 'Fast'
+            },
+            'sam2_hiera_tiny': {
+                'config': 'sam2_hiera_t.yaml',
+                'checkpoint': 'https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_tiny.pt',
+                'size': '77MB',
+                'quality': 'Fastest'
+            }
+        }
+        # Choose model based on GPU
         if torch.cuda.is_available():
             gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
+            if gpu_memory > 20:  # L4 has 24GB
+                model_name = 'sam2_hiera_large'
+            elif gpu_memory > 10:
+                model_name = 'sam2_hiera_base_plus'
+            elif gpu_memory > 6:
+                model_name = 'sam2_hiera_small'
+            else:
+                model_name = 'sam2_hiera_tiny'
+        else:
+            model_name = 'sam2_hiera_tiny'  # CPU = smallest
+        logger.info(f"Selected SAM2 model: {model_name} ({MODEL_URLS[model_name]['quality']})")
+        # Setup cache directory
+        cache_dir = Path("/tmp/sam2_models")
+        cache_dir.mkdir(exist_ok=True)
+        model_path = cache_dir / f"{model_name}.pt"
+        config_name = MODEL_URLS[model_name]['config']
+        # Download if not cached
+        if not model_path.exists():
+            logger.info(f"Downloading SAM2 {model_name} ({MODEL_URLS[model_name]['size']})...")
+            # Show progress in Streamlit
+            progress_text = st.empty()
+            progress_bar = st.progress(0)
+            # Download with progress
+            response = requests.get(MODEL_URLS[model_name]['checkpoint'], stream=True)
+            total_size = int(response.headers.get('content-length', 0))
+            with open(model_path, 'wb') as f:
+                downloaded = 0
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+                    downloaded += len(chunk)
+                    if total_size > 0:
+                        progress = downloaded / total_size
+                        progress_bar.progress(progress)
+                        progress_text.text(f"Downloading SAM2: {downloaded/(1024**2):.1f}MB / {total_size/(1024**2):.1f}MB")
+            progress_text.empty()
+            progress_bar.empty()
+            logger.info(f"✅ SAM2 model downloaded to {model_path}")
         else:
+            logger.info(f"✅ Using cached SAM2 model from {model_path}")
+        # Build SAM2 model
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        sam2_model = build_sam2(
+            config_file=config_name,
+            ckpt_path=str(model_path),
+            device=device,
+            apply_postprocessing=True
+        )
+        # Create predictor for frame-by-frame processing
+        predictor = SAM2ImagePredictor(sam2_model)
+        logger.info(f"✅ SAM2 loaded successfully on {device}")
+        return predictor, model_name
+    except ImportError as e:
+        logger.error(f"SAM2 not installed. Install with: pip install sam-2")
+        return None, None
     except Exception as e:
+        logger.error(f"Failed to load SAM2 model: {e}")
+        return None, None
+# ============================================
+# SAM2 VIDEO PROCESSOR
+# ============================================
+class SAM2VideoProcessor:
+    """
+    SAM2 optimized for video processing
+    Handles temporal consistency across frames
+    """
+    def __init__(self):
+        self.predictor = None
+        self.model_name = None
+        self.loaded = False
+        self.previous_mask = None
+        self.frame_count = 0
+    def load_model(self):
+        """Load SAM2 model if not already loaded"""
+        if not self.loaded:
+            with st.spinner("🎬 Loading SAM2 Video Model..."):
+                self.predictor, self.model_name = load_sam2_model_dynamic()
+                self.loaded = True
+                if self.predictor:
+                    logger.info(f"SAM2 Video Processor ready with {self.model_name}")
+        return self.predictor is not None
+    def segment_frame(self, frame, use_previous=True):
+        """
+        Segment a single frame with temporal consistency
+        Args:
+            frame: Input frame (H, W, 3) numpy array
+            use_previous: Use previous frame's mask for consistency
+        Returns:
+            mask: Segmentation mask (H, W) float32
+        """
+        if not self.load_model():
+            return None
+        try:
+            # Set the image
+            self.predictor.set_image(frame)
+            h, w = frame.shape[:2]
+            # Generate point prompts
+            if use_previous and self.previous_mask is not None:
+                # Use previous mask to guide current segmentation
+                # Find center of mass of previous mask
+                y_coords, x_coords = np.where(self.previous_mask > 0.5)
+                if len(y_coords) > 0:
+                    center_y = int(np.mean(y_coords))
+                    center_x = int(np.mean(x_coords))
+                    # Add points around previous center
+                    point_coords = np.array([
+                        [center_x, center_y],
+                        [center_x, center_y - h//8],  # Above
+                        [center_x, center_y + h//8],  # Below
+                    ])
+                else:
+                    # Fallback to center points
+                    point_coords = np.array([
+                        [w//2, h//2],
+                        [w//2, h//3],
+                        [w//2, 2*h//3]
+                    ])
             else:
+                # Initial frame - use center points
+                point_coords = np.array([
+                    [w//2, h//2],      # Center
+                    [w//2, h//3],      # Upper (head)
+                    [w//2, 2*h//3],    # Lower (body)
+                    [w//3, h//2],      # Left
+                    [2*w//3, h//2],    # Right
+                ])
+            point_labels = np.ones(len(point_coords))  # All foreground
+            # Generate masks with SAM2
+            masks, scores, logits = self.predictor.predict(
+                point_coords=point_coords,
+                point_labels=point_labels,
+                multimask_output=True,
+                return_logits=True
+            )
+            # Select best mask
+            best_idx = np.argmax(scores)
+            mask = masks[best_idx].astype(np.float32)
+            # Apply temporal smoothing if we have previous mask
+            if use_previous and self.previous_mask is not None:
+                # Blend with previous mask for temporal consistency
+                alpha = 0.3  # Smoothing factor
+                mask = (1 - alpha) * mask + alpha * self.previous_mask
+                mask = np.clip(mask, 0, 1)
+            # Post-processing for better quality
+            # Morphological operations
             kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+            mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+            mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+            # Gaussian blur for smooth edges
+            mask = cv2.GaussianBlur(mask, (7, 7), 0)
+            # Store for next frame
+            self.previous_mask = mask.copy()
+            self.frame_count += 1
+            return mask
+        except Exception as e:
+            logger.error(f"SAM2 segmentation failed: {e}")
+            return None
+    def reset(self):
+        """Reset temporal state for new video"""
+        self.previous_mask = None
+        self.frame_count = 0
+        logger.info("SAM2 Video Processor reset for new video")
+# ============================================
+# LAZY LOADER FOR SAM2
+# ============================================
+class SAM2LazyLoader:
+    """
+    Lazy loading for SAM2 - only loads when needed
+    """
+    def __init__(self):
+        self.processor = SAM2VideoProcessor()
+    def segment_frame(self, frame, use_temporal=True):
+        """
+        Segment frame with lazy loading
+        Model loads on first call
+        """
+        return self.processor.segment_frame(frame, use_previous=use_temporal)
+    def reset(self):
+        """Reset for new video"""
+        self.processor.reset()
+    @property
+    def is_available(self):
+        """Check if SAM2 can be loaded"""
+        try:
+            import sam2
+            return True
+        except ImportError:
+            return False
+    @property
+    def is_loaded(self):
+        """Check if model is already loaded"""
+        return self.processor.loaded
+# ============================================
+# INTEGRATION WITH VIDEO PROCESSING
+# ============================================
+# Global SAM2 instance
+SAM2_VIDEO = SAM2LazyLoader()
+def process_video_with_sam2(video_path, background_image, progress_callback=None):
+    """
+    Process video using SAM2 with temporal consistency
+    """
     try:
+        # Open video
         cap = cv2.VideoCapture(video_path)
+        # Get video properties
         fps = int(cap.get(cv2.CAP_PROP_FPS))
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Create output writer
+        output_path = '/tmp/output_sam2.mp4'
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        # Resize background
         background_resized = cv2.resize(background_image, (width, height))
+        # Reset SAM2 for new video
+        SAM2_VIDEO.reset()
         frame_count = 0
         while True:
             ret, frame = cap.read()
             if not ret:
                 break
+            # Convert BGR to RGB
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Segment with SAM2 (with temporal consistency)
+            mask = SAM2_VIDEO.segment_frame(frame_rgb, use_temporal=(frame_count > 0))
+            if mask is not None:
+                # Apply mask
+                if mask.ndim == 2:
+                    mask = np.expand_dims(mask, axis=2)
+                # Composite
+                composite = frame_rgb * mask + background_resized * (1 - mask)
+                composite = composite.astype(np.uint8)
+                # Convert back to BGR
+                composite_bgr = cv2.cvtColor(composite, cv2.COLOR_RGB2BGR)
+            else:
+                composite_bgr = frame
+            out.write(composite_bgr)
+            frame_count += 1
+            # Progress callback
+            if progress_callback:
+                progress = frame_count / total_frames
+                progress_callback(progress, f"SAM2 Processing: {frame_count}/{total_frames}")
+            # Memory cleanup every 50 frames
+            if frame_count % 50 == 0 and torch.cuda.is_available():
+                torch.cuda.empty_cache()
         cap.release()
         out.release()
+        logger.info(f"✅ SAM2 video processing complete: {frame_count} frames")
         return output_path
     except Exception as e:
+        logger.error(f"SAM2 video processing failed: {e}")
         return None
+# ============================================
+# EXAMPLE USAGE
+# ============================================
 def main():
+    st.title("🎬 Video Background Replacer with SAM2")
+    # Status display
+    col1, col2, col3 = st.columns(3)
     with col1:
+        if SAM2_VIDEO.is_available:
+            if SAM2_VIDEO.is_loaded:
+                st.success("✅ SAM2 Loaded")
+            else:
+                st.info("🎯 SAM2 Ready (loads on demand)")
         else:
+            st.error("❌ SAM2 not installed")
+    # Process button
+    if st.button("Process with SAM2"):
         if uploaded_video:
+            # This triggers model download on first use
+            result = process_video_with_sam2(
+                video_path,
+                background_image,
+                progress_callback=update_progress
+            )
+            if result:
+                st.success("✅ Video processed with SAM2!")
+                st.video(result)
 if __name__ == "__main__":
+    main()