Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 26, 2025

Commit

a70dcf0

1 Parent(s): 05b645d

Update utils/system/memory_manager.py

Browse files

Files changed (1) hide show

utils/system/memory_manager.py +377 -399

utils/system/memory_manager.py CHANGED Viewed

@@ -1,473 +1,451 @@
 """
-Memory Management Module
-Handles memory cleanup, monitoring, and GPU resource management
 """
 import gc
 import os
-import psutil
-import torch
 import time
 import logging
 import threading
 from typing import Dict, Any, Optional, Callable
-from core.exceptions import MemoryError, ResourceExhaustionError  # Updated import path
 logger = logging.getLogger(__name__)
 class MemoryManager:
     """
-    Comprehensive memory management for video processing applications
     """
     def __init__(self, device, memory_limit_gb: Optional[float] = None):
-        if isinstance(device, str):
-            device = torch.device(device)
-        self.device = device
-        self.gpu_available = device.type in ['cuda', 'mps']
         self.memory_limit_gb = memory_limit_gb
-        self.cleanup_callbacks = []
         self.monitoring_active = False
-        self.monitoring_thread = None
         self.stats = {
-            'cleanup_count': 0,
-            'peak_memory_usage': 0.0,
-            'total_allocated': 0.0,
-            'total_freed': 0.0
         }
-        # Initialize memory monitoring
         self._initialize_memory_limits()
-        logger.info(f"MemoryManager initialized for device: {device}")
     def _initialize_memory_limits(self):
-        """Initialize memory limits based on device and system"""
-        if self.device.type == 'cuda':
-            try:
-                device_idx = self.device.index or 0
-                device_props = torch.cuda.get_device_properties(device_idx)
-                total_memory_gb = device_props.total_memory / (1024**3)
-                # Use 80% of GPU memory as default limit if not specified
                 if self.memory_limit_gb is None:
-                    self.memory_limit_gb = total_memory_gb * 0.8
-                logger.info(f"CUDA memory limit set to {self.memory_limit_gb:.1f}GB "
-                           f"(total: {total_memory_gb:.1f}GB)")
-            except Exception as e:
-                logger.warning(f"Could not get CUDA memory info: {e}")
-                self.memory_limit_gb = 4.0  # Conservative fallback
-        elif self.device.type == 'mps':
-            # MPS uses unified memory, so check system memory
-            system_memory_gb = psutil.virtual_memory().total / (1024**3)
-            if self.memory_limit_gb is None:
-                # Use 50% of system memory for MPS as it shares with system
-                self.memory_limit_gb = system_memory_gb * 0.5
-            logger.info(f"MPS memory limit set to {self.memory_limit_gb:.1f}GB "
-                       f"(system: {system_memory_gb:.1f}GB)")
-        else:  # CPU
-            system_memory_gb = psutil.virtual_memory().total / (1024**3)
             if self.memory_limit_gb is None:
-                # Use 60% of system memory for CPU processing
-                self.memory_limit_gb = system_memory_gb * 0.6
-            logger.info(f"CPU memory limit set to {self.memory_limit_gb:.1f}GB "
-                       f"(system: {system_memory_gb:.1f}GB)")
     def get_memory_usage(self) -> Dict[str, Any]:
-        """Get comprehensive memory usage statistics"""
-        usage = {
-            'device_type': self.device.type,
-            'memory_limit_gb': self.memory_limit_gb,
-            'timestamp': time.time()
         }
-        try:
-            if self.device.type == 'cuda':
-                device_idx = self.device.index or 0
-                # GPU memory
-                allocated = torch.cuda.memory_allocated(device_idx)
-                reserved = torch.cuda.memory_reserved(device_idx)
-                total = torch.cuda.get_device_properties(device_idx).total_memory
-                usage.update({
-                    'gpu_allocated_gb': allocated / (1024**3),
-                    'gpu_reserved_gb': reserved / (1024**3),
-                    'gpu_total_gb': total / (1024**3),
-                    'gpu_utilization_percent': (allocated / total) * 100,
-                    'gpu_reserved_percent': (reserved / total) * 100,
-                    'gpu_free_gb': (total - reserved) / (1024**3)
-                })
-                # Peak memory tracking
-                max_allocated = torch.cuda.max_memory_allocated(device_idx)
-                max_reserved = torch.cuda.max_memory_reserved(device_idx)
-                usage.update({
-                    'gpu_max_allocated_gb': max_allocated / (1024**3),
-                    'gpu_max_reserved_gb': max_reserved / (1024**3)
-                })
-            elif self.device.type == 'mps':
-                # MPS doesn't have explicit memory tracking like CUDA
-                # Fall back to system memory monitoring
                 vm = psutil.virtual_memory()
-                usage.update({
-                    'system_memory_gb': vm.total / (1024**3),
-                    'system_available_gb': vm.available / (1024**3),
-                    'system_used_gb': vm.used / (1024**3),
-                    'system_utilization_percent': vm.percent
-                })
-        except Exception as e:
-            logger.warning(f"Error getting GPU memory usage: {e}")
-        # Always include system memory info
         try:
-            vm = psutil.virtual_memory()
-            swap = psutil.swap_memory()
-            usage.update({
-                'system_total_gb': vm.total / (1024**3),
-                'system_available_gb': vm.available / (1024**3),
-                'system_used_gb': vm.used / (1024**3),
-                'system_percent': vm.percent,
-                'swap_total_gb': swap.total / (1024**3),
-                'swap_used_gb': swap.used / (1024**3),
-                'swap_percent': swap.percent
-            })
-        except Exception as e:
-            logger.warning(f"Error getting system memory usage: {e}")
-        # Process-specific memory
         try:
-            process = psutil.Process()
-            memory_info = process.memory_info()
-            usage.update({
-                'process_rss_gb': memory_info.rss / (1024**3),  # Physical memory
-                'process_vms_gb': memory_info.vms / (1024**3),  # Virtual memory
-            })
         except Exception as e:
-            logger.warning(f"Error getting process memory usage: {e}")
-        # Update peak tracking
-        current_usage = usage.get('gpu_allocated_gb', usage.get('system_used_gb', 0))
-        if current_usage > self.stats['peak_memory_usage']:
-            self.stats['peak_memory_usage'] = current_usage
-        return usage
-    def cleanup_basic(self):
-        """Basic memory cleanup - lightweight operation"""
         try:
             gc.collect()
-            if self.device.type == 'cuda':
                 torch.cuda.empty_cache()
-            self.stats['cleanup_count'] += 1
-            logger.debug("Basic memory cleanup completed")
-        except Exception as e:
-            logger.warning(f"Basic memory cleanup failed: {e}")
-    def cleanup_aggressive(self):
-        """Aggressive memory cleanup - more thorough but slower"""
-        try:
-            start_time = time.time()
-            # Run all registered cleanup callbacks first
-            for callback in self.cleanup_callbacks:
-                try:
-                    callback()
-                except Exception as e:
-                    logger.warning(f"Cleanup callback failed: {e}")
-            # Multiple garbage collection passes
-            for _ in range(3):
-                gc.collect()
-            if self.device.type == 'cuda':
-                # CUDA-specific aggressive cleanup
                 torch.cuda.empty_cache()
-                torch.cuda.synchronize()
-                # Reset peak memory statistics
-                device_idx = self.device.index or 0
-                torch.cuda.reset_peak_memory_stats(device_idx)
-            elif self.device.type == 'mps':
-                # MPS cleanup - mainly garbage collection
-                # Could add MPS-specific operations if available
                 pass
-            cleanup_time = time.time() - start_time
-            self.stats['cleanup_count'] += 1
-            logger.debug(f"Aggressive memory cleanup completed in {cleanup_time:.2f}s")
-        except Exception as e:
-            logger.error(f"Aggressive memory cleanup failed: {e}")
-            raise MemoryError("aggressive_cleanup", str(e))
-    def check_memory_pressure(self, threshold_percent: float = 85.0) -> Dict[str, Any]:
-        """Check if system is under memory pressure"""
-        usage = self.get_memory_usage()
-        pressure_info = {
-            'under_pressure': False,
-            'pressure_level': 'normal',  # normal, warning, critical
-            'recommendations': [],
-            'usage_percent': 0.0
-        }
-        # Determine usage percentage based on device type
-        if self.device.type == 'cuda':
-            usage_percent = usage.get('gpu_utilization_percent', 0)
-            pressure_info['usage_percent'] = usage_percent
-            if usage_percent >= threshold_percent:
-                pressure_info['under_pressure'] = True
-                if usage_percent >= 95:
-                    pressure_info['pressure_level'] = 'critical'
-                    pressure_info['recommendations'].extend([
-                        'Reduce batch size immediately',
-                        'Enable gradient checkpointing',
-                        'Consider switching to CPU processing'
-                    ])
-                elif usage_percent >= threshold_percent:
-                    pressure_info['pressure_level'] = 'warning'
-                    pressure_info['recommendations'].extend([
-                        'Run aggressive memory cleanup',
-                        'Reduce keyframe interval',
-                        'Monitor memory usage closely'
-                    ])
-        else:  # CPU or MPS - use system memory
-            usage_percent = usage.get('system_percent', 0)
-            pressure_info['usage_percent'] = usage_percent
-            if usage_percent >= threshold_percent:
-                pressure_info['under_pressure'] = True
-                if usage_percent >= 95:
-                    pressure_info['pressure_level'] = 'critical'
-                    pressure_info['recommendations'].extend([
-                        'Free system memory immediately',
-                        'Close unnecessary applications',
-                        'Reduce video processing quality'
-                    ])
-                elif usage_percent >= threshold_percent:
-                    pressure_info['pressure_level'] = 'warning'
-                    pressure_info['recommendations'].extend([
-                        'Run memory cleanup',
-                        'Monitor system memory',
-                        'Consider processing in smaller chunks'
-                    ])
-        return pressure_info
-    def auto_cleanup_if_needed(self, pressure_threshold: float = 80.0) -> bool:
-        """Automatically run cleanup if memory pressure is detected"""
-        pressure = self.check_memory_pressure(pressure_threshold)
-        if pressure['under_pressure']:
-            cleanup_method = (
-                self.cleanup_aggressive
-                if pressure['pressure_level'] == 'critical'
-                else self.cleanup_basic
-            )
-            logger.info(f"Auto-cleanup triggered due to {pressure['pressure_level']} "
-                       f"memory pressure ({pressure['usage_percent']:.1f}%)")
-            cleanup_method()
-            return True
-        return False
     def register_cleanup_callback(self, callback: Callable):
-        """Register a callback to run during cleanup operations"""
         self.cleanup_callbacks.append(callback)
-        logger.debug("Cleanup callback registered")
-    def start_monitoring(self, interval_seconds: float = 30.0,
-                        pressure_callback: Optional[Callable] = None):
-        """Start background memory monitoring"""
         if self.monitoring_active:
             logger.warning("Memory monitoring already active")
             return
         self.monitoring_active = True
-        def monitor_loop():
             while self.monitoring_active:
                 try:
                     pressure = self.check_memory_pressure()
-                    if pressure['under_pressure']:
-                        logger.warning(f"Memory pressure detected: {pressure['pressure_level']} "
-                                     f"({pressure['usage_percent']:.1f}%)")
                         if pressure_callback:
                             try:
                                 pressure_callback(pressure)
                             except Exception as e:
                                 logger.error(f"Pressure callback failed: {e}")
-                        # Auto-cleanup on critical pressure
-                        if pressure['pressure_level'] == 'critical':
                             self.cleanup_aggressive()
-                    time.sleep(interval_seconds)
                 except Exception as e:
                     logger.error(f"Memory monitoring error: {e}")
-                    time.sleep(interval_seconds)
-        self.monitoring_thread = threading.Thread(target=monitor_loop, daemon=True)
         self.monitoring_thread.start()
         logger.info(f"Memory monitoring started (interval: {interval_seconds}s)")
     def stop_monitoring(self):
-        """Stop background memory monitoring"""
         if self.monitoring_active:
             self.monitoring_active = False
             if self.monitoring_thread and self.monitoring_thread.is_alive():
                 self.monitoring_thread.join(timeout=5.0)
             logger.info("Memory monitoring stopped")
-    def estimate_memory_requirement(self, video_width: int, video_height: int,
-                                  frames_in_memory: int = 5) -> Dict[str, float]:
-        """Estimate memory requirements for video processing"""
-        # Base memory per frame (RGB image)
         bytes_per_frame = video_width * video_height * 3
-        # Additional overhead for processing
-        overhead_multiplier = 3.0  # For masks, intermediate results, etc.
-        estimated_memory = {
-            'frames_memory_gb': (bytes_per_frame * frames_in_memory * overhead_multiplier) / (1024**3),
-            'model_memory_gb': 4.0,  # Rough estimate for SAM2 + MatAnyone
-            'system_overhead_gb': 2.0,
-            'total_estimated_gb': 0.0
         }
-        estimated_memory['total_estimated_gb'] = sum([
-            estimated_memory['frames_memory_gb'],
-            estimated_memory['model_memory_gb'],
-            estimated_memory['system_overhead_gb']
-        ])
-        return estimated_memory
-    def can_process_video(self, video_width: int, video_height: int,
-                         frames_in_memory: int = 5) -> Dict[str, Any]:
-        """Check if video can be processed with current memory"""
         estimate = self.estimate_memory_requirement(video_width, video_height, frames_in_memory)
-        current_usage = self.get_memory_usage()
-        # Available memory calculation
-        if self.device.type == 'cuda':
-            available_memory = current_usage.get('gpu_free_gb', 0)
         else:
-            available_memory = current_usage.get('system_available_gb', 0)
-        can_process = estimate['total_estimated_gb'] <= available_memory
-        result = {
-            'can_process': can_process,
-            'estimated_memory_gb': estimate['total_estimated_gb'],
-            'available_memory_gb': available_memory,
-            'memory_margin_gb': available_memory - estimate['total_estimated_gb'],
-            'recommendations': []
-        }
-        if not can_process:
-            deficit = estimate['total_estimated_gb'] - available_memory
-            result['recommendations'] = [
-                f"Free {deficit:.1f}GB of memory",
-                "Reduce video resolution",
                 "Process in smaller chunks",
-                "Use lower quality settings"
-            ]
-        elif result['memory_margin_gb'] < 1.0:
-            result['recommendations'] = [
-                "Memory margin is low",
-                "Monitor memory usage during processing",
-                "Consider reducing batch size"
-            ]
-        return result
-    def get_optimization_suggestions(self) -> Dict[str, Any]:
-        """Get memory optimization suggestions based on current state"""
-        usage = self.get_memory_usage()
-        suggestions = {
-            'current_usage_percent': usage.get('gpu_utilization_percent', usage.get('system_percent', 0)),
-            'suggestions': [],
-            'priority': 'low'  # low, medium, high
         }
-        usage_percent = suggestions['current_usage_percent']
-        if usage_percent >= 90:
-            suggestions['priority'] = 'high'
-            suggestions['suggestions'].extend([
-                'Run aggressive memory cleanup immediately',
-                'Reduce batch size to 1',
-                'Enable gradient checkpointing if available',
-                'Consider switching to CPU processing'
-            ])
-        elif usage_percent >= 75:
-            suggestions['priority'] = 'medium'
-            suggestions['suggestions'].extend([
-                'Run memory cleanup regularly',
-                'Monitor memory usage closely',
-                'Reduce keyframe interval',
-                'Use mixed precision if supported'
-            ])
-        elif usage_percent >= 50:
-            suggestions['priority'] = 'low'
-            suggestions['suggestions'].extend([
-                'Current usage is acceptable',
-                'Regular cleanup should be sufficient',
-                'Monitor for memory leaks during long operations'
-            ])
-        else:
-            suggestions['suggestions'] = [
-                'Memory usage is optimal',
-                'No immediate action required'
-            ]
-        return suggestions
     def get_stats(self) -> Dict[str, Any]:
-        """Get memory management statistics"""
         return {
-            'cleanup_count': self.stats['cleanup_count'],
-            'peak_memory_usage_gb': self.stats['peak_memory_usage'],
-            'monitoring_active': self.monitoring_active,
-            'device_type': self.device.type,
-            'memory_limit_gb': self.memory_limit_gb,
-            'registered_callbacks': len(self.cleanup_callbacks)
         }
     def __del__(self):
-        """Cleanup when MemoryManager is destroyed"""
         try:
             self.stop_monitoring()
             self.cleanup_aggressive()
         except Exception:
-            pass  # Ignore errors during cleanup

+#!/usr/bin/env python3
 """
+Memory Manager for BackgroundFX Pro
+- Safe on CPU/CUDA/MPS (mostly CUDA/T4 on Spaces)
+- Accepts `device` as str or torch.device
+- Optional per-process VRAM cap (env or method)
+- Detailed usage reporting (CPU/RAM + VRAM + torch allocator)
+- Light and aggressive cleanup paths
+- Background monitor (optional)
+Env switches:
+  BFX_DISABLE_LIMIT=1          -> do not set VRAM fraction automatically
+  BFX_CUDA_FRACTION=0.80       -> fraction to cap per-process VRAM (0.10..0.95)
 """
+from __future__ import annotations
 import gc
 import os
 import time
 import logging
 import threading
 from typing import Dict, Any, Optional, Callable
+# Optional deps
+try:
+    import psutil
+except Exception:  # pragma: no cover
+    psutil = None
+try:
+    import torch
+except Exception:  # pragma: no cover
+    torch = None
 logger = logging.getLogger(__name__)
+# ---- local exception to avoid shadowing built-in MemoryError ----
+class MemoryManagerError(Exception):
+    pass
+def _bytes_to_gb(x: int | float) -> float:
+    try:
+        return float(x) / (1024**3)
+    except Exception:
+        return 0.0
+def _normalize_device(dev) -> "torch.device":
+    if torch is None:
+        # fake CPU device
+        class _Fake:
+            type = "cpu"
+            index = None
+        return _Fake()  # type: ignore[return-value]
+    if isinstance(dev, str):
+        return torch.device(dev)
+    if hasattr(dev, "type"):
+        return dev
+    # default CPU
+    return torch.device("cpu")
+def _cuda_index(device) -> Optional[int]:
+    if getattr(device, "type", "cpu") != "cuda":
+        return None
+    idx = getattr(device, "index", None)
+    if idx is None:
+        # normalize bare "cuda" to 0
+        return 0
+    return int(idx)
 class MemoryManager:
     """
+    Comprehensive memory management with VRAM cap + cleanup utilities.
     """
     def __init__(self, device, memory_limit_gb: Optional[float] = None):
+        self.device = _normalize_device(device)
+        self.device_type = getattr(self.device, "type", "cpu")
+        self.cuda_idx = _cuda_index(self.device)
+        self.gpu_available = bool(
+            torch and self.device_type == "cuda" and torch.cuda.is_available()
+        )
+        self.mps_available = bool(
+            torch and self.device_type == "mps" and getattr(torch.backends, "mps", None)
+            and torch.backends.mps.is_available()
+        )
         self.memory_limit_gb = memory_limit_gb
+        self.cleanup_callbacks: list[Callable] = []
         self.monitoring_active = False
+        self.monitoring_thread: Optional[threading.Thread] = None
         self.stats = {
+            "cleanup_count": 0,
+            "peak_memory_usage": 0.0,
+            "total_allocated": 0.0,
+            "total_freed": 0.0,
         }
+        self.applied_fraction: Optional[float] = None
         self._initialize_memory_limits()
+        self._maybe_apply_vram_fraction()
+        logger.info(f"MemoryManager initialized (device={self.device}, cuda={self.gpu_available})")
+    # -------------------------------
+    # init helpers
+    # -------------------------------
     def _initialize_memory_limits(self):
+        try:
+            if self.gpu_available:
+                props = torch.cuda.get_device_properties(self.cuda_idx or 0)
+                total_gb = _bytes_to_gb(props.total_memory)
                 if self.memory_limit_gb is None:
+                    self.memory_limit_gb = max(0.5, total_gb * 0.80)  # default 80%
+                logger.info(
+                    f"CUDA memory limit baseline ~{self.memory_limit_gb:.1f}GB "
+                    f"(device total {total_gb:.1f}GB)"
+                )
+            elif self.mps_available:
+                vm = psutil.virtual_memory() if psutil else None
+                total_gb = _bytes_to_gb(vm.total) if vm else 0.0
+                if self.memory_limit_gb is None:
+                    self.memory_limit_gb = max(0.5, total_gb * 0.50)
+                logger.info(f"MPS memory baseline ~{self.memory_limit_gb:.1f}GB (system {total_gb:.1f}GB)")
+            else:
+                vm = psutil.virtual_memory() if psutil else None
+                total_gb = _bytes_to_gb(vm.total) if vm else 0.0
+                if self.memory_limit_gb is None:
+                    self.memory_limit_gb = max(0.5, total_gb * 0.60)
+                logger.info(f"CPU memory baseline ~{self.memory_limit_gb:.1f}GB (system {total_gb:.1f}GB)")
+        except Exception as e:
+            logger.warning(f"Memory limit init failed: {e}")
             if self.memory_limit_gb is None:
+                self.memory_limit_gb = 4.0  # conservative fallback
+    def _maybe_apply_vram_fraction(self):
+        if not self.gpu_available or torch is None:
+            return
+        if os.environ.get("BFX_DISABLE_LIMIT", ""):
+            return
+        frac_env = os.environ.get("BFX_CUDA_FRACTION", "").strip()
+        try:
+            fraction = float(frac_env) if frac_env else 0.80
+        except Exception:
+            fraction = 0.80
+        applied = self.limit_cuda_memory(fraction=fraction)
+        if applied:
+            logger.info(f"Per-process CUDA memory fraction set to {applied:.2f} on device {self.cuda_idx or 0}")
+    # -------------------------------
+    # public API
+    # -------------------------------
     def get_memory_usage(self) -> Dict[str, Any]:
+        usage: Dict[str, Any] = {
+            "device_type": self.device_type,
+            "memory_limit_gb": self.memory_limit_gb,
+            "timestamp": time.time(),
         }
+        # CPU / system
+        if psutil:
+            try:
                 vm = psutil.virtual_memory()
+                usage.update(
+                    dict(
+                        system_total_gb=round(_bytes_to_gb(vm.total), 3),
+                        system_available_gb=round(_bytes_to_gb(vm.available), 3),
+                        system_used_gb=round(_bytes_to_gb(vm.used), 3),
+                        system_percent=float(vm.percent),
+                    )
+                )
+                swap = psutil.swap_memory()
+                usage.update(
+                    dict(
+                        swap_total_gb=round(_bytes_to_gb(swap.total), 3),
+                        swap_used_gb=round(_bytes_to_gb(swap.used), 3),
+                        swap_percent=float(swap.percent),
+                    )
+                )
+                proc = psutil.Process()
+                mi = proc.memory_info()
+                usage.update(
+                    dict(
+                        process_rss_gb=round(_bytes_to_gb(mi.rss), 3),
+                        process_vms_gb=round(_bytes_to_gb(mi.vms), 3),
+                    )
+                )
+            except Exception as e:
+                logger.debug(f"psutil stats error: {e}")
+        # GPU
+        if self.gpu_available and torch is not None:
+            try:
+                # mem_get_info returns (free, total) in bytes
+                free_b, total_b = torch.cuda.mem_get_info(self.cuda_idx or 0)
+                used_b = total_b - free_b
+                usage.update(
+                    dict(
+                        vram_total_gb=round(_bytes_to_gb(total_b), 3),
+                        vram_used_gb=round(_bytes_to_gb(used_b), 3),
+                        vram_free_gb=round(_bytes_to_gb(free_b), 3),
+                        vram_used_percent=float(used_b / total_b * 100.0) if total_b else 0.0,
+                    )
+                )
+            except Exception as e:
+                logger.debug(f"mem_get_info failed: {e}")
+            # torch allocator stats
+            try:
+                idx = self.cuda_idx or 0
+                allocated = torch.cuda.memory_allocated(idx)
+                reserved = torch.cuda.memory_reserved(idx)
+                usage["torch_allocated_gb"] = round(_bytes_to_gb(allocated), 3)
+                usage["torch_reserved_gb"] = round(_bytes_to_gb(reserved), 3)
+                # inactive split (2.x)
+                try:
+                    inactive = torch.cuda.memory_stats(idx).get("inactive_split_bytes.all.current", 0)
+                    usage["torch_inactive_split_gb"] = round(_bytes_to_gb(inactive), 3)
+                except Exception:
+                    pass
+            except Exception as e:
+                logger.debug(f"allocator stats failed: {e}")
+            usage["applied_fraction"] = self.applied_fraction
+        # Update peak tracker
+        current = usage.get("vram_used_gb", usage.get("system_used_gb", 0.0))
         try:
+            if float(current) > float(self.stats["peak_memory_usage"]):
+                self.stats["peak_memory_usage"] = float(current)
+        except Exception:
+            pass
+        return usage
+    def limit_cuda_memory(self, fraction: Optional[float] = None, max_gb: Optional[float] = None) -> Optional[float]:
+        if not self.gpu_available or torch is None:
+            return None
+        # derive fraction from max_gb if provided
+        if max_gb is not None:
+            try:
+                _, total_b = torch.cuda.mem_get_info(self.cuda_idx or 0)
+                total_gb = _bytes_to_gb(total_b)
+                if total_gb <= 0:
+                    return None
+                fraction = min(max(0.10, max_gb / total_gb), 0.95)
+            except Exception as e:
+                logger.debug(f"fraction from max_gb failed: {e}")
+                return None
+        if fraction is None:
+            fraction = 0.80
+        fraction = float(max(0.10, min(0.95, fraction)))
         try:
+            torch.cuda.set_per_process_memory_fraction(fraction, device=self.cuda_idx or 0)
+            self.applied_fraction = fraction
+            return fraction
         except Exception as e:
+            logger.debug(f"set_per_process_memory_fraction failed: {e}")
+            return None
+    def cleanup(self) -> None:
+        """Light cleanup used frequently between steps."""
         try:
             gc.collect()
+        except Exception:
+            pass
+        if self.gpu_available and torch is not None:
+            try:
                 torch.cuda.empty_cache()
+            except Exception:
+                pass
+        self.stats["cleanup_count"] += 1
+    def cleanup_basic(self) -> None:
+        """Alias kept for compatibility."""
+        self.cleanup()
+    def cleanup_aggressive(self) -> None:
+        """Aggressive cleanup for OOM recovery or big scene switches."""
+        if self.gpu_available and torch is not None:
+            try:
+                torch.cuda.synchronize(self.cuda_idx or 0)
+            except Exception:
+                pass
+            try:
                 torch.cuda.empty_cache()
+            except Exception:
                 pass
+            try:
+                torch.cuda.reset_peak_memory_stats(self.cuda_idx or 0)
+            except Exception:
+                pass
+            try:
+                if hasattr(torch.cuda, "ipc_collect"):
+                    torch.cuda.ipc_collect()
+            except Exception:
+                pass
+        try:
+            gc.collect(); gc.collect()
+        except Exception:
+            pass
+        self.stats["cleanup_count"] += 1
     def register_cleanup_callback(self, callback: Callable):
         self.cleanup_callbacks.append(callback)
+    def start_monitoring(self, interval_seconds: float = 30.0, pressure_callback: Optional[Callable] = None):
         if self.monitoring_active:
             logger.warning("Memory monitoring already active")
             return
         self.monitoring_active = True
+        def loop():
             while self.monitoring_active:
                 try:
                     pressure = self.check_memory_pressure()
+                    if pressure["under_pressure"]:
+                        logger.warning(
+                            f"Memory pressure: {pressure['pressure_level']} "
+                            f"({pressure['usage_percent']:.1f}%)"
+                        )
                         if pressure_callback:
                             try:
                                 pressure_callback(pressure)
                             except Exception as e:
                                 logger.error(f"Pressure callback failed: {e}")
+                        if pressure["pressure_level"] == "critical":
                             self.cleanup_aggressive()
                 except Exception as e:
                     logger.error(f"Memory monitoring error: {e}")
+                time.sleep(interval_seconds)
+        self.monitoring_thread = threading.Thread(target=loop, daemon=True)
         self.monitoring_thread.start()
         logger.info(f"Memory monitoring started (interval: {interval_seconds}s)")
     def stop_monitoring(self):
         if self.monitoring_active:
             self.monitoring_active = False
             if self.monitoring_thread and self.monitoring_thread.is_alive():
                 self.monitoring_thread.join(timeout=5.0)
             logger.info("Memory monitoring stopped")
+    def check_memory_pressure(self, threshold_percent: float = 85.0) -> Dict[str, Any]:
+        usage = self.get_memory_usage()
+        info = {
+            "under_pressure": False,
+            "pressure_level": "normal",
+            "usage_percent": 0.0,
+            "recommendations": [],
+        }
+        if self.gpu_available:
+            percent = usage.get("vram_used_percent", 0.0)
+            info["usage_percent"] = percent
+            if percent >= threshold_percent:
+                info["under_pressure"] = True
+                if percent >= 95:
+                    info["pressure_level"] = "critical"
+                    info["recommendations"] += [
+                        "Run aggressive memory cleanup",
+                        "Reduce frame cache / chunk size",
+                        "Lower resolution or disable previews",
+                    ]
+                else:
+                    info["pressure_level"] = "warning"
+                    info["recommendations"] += [
+                        "Run cleanup",
+                        "Monitor memory usage",
+                        "Reduce keyframe interval",
+                    ]
+        else:
+            percent = usage.get("system_percent", 0.0)
+            info["usage_percent"] = percent
+            if percent >= threshold_percent:
+                info["under_pressure"] = True
+                if percent >= 95:
+                    info["pressure_level"] = "critical"
+                    info["recommendations"] += [
+                        "Close other processes",
+                        "Reduce resolution",
+                        "Split video into chunks",
+                    ]
+                else:
+                    info["pressure_level"] = "warning"
+                    info["recommendations"] += [
+                        "Run cleanup",
+                        "Monitor usage",
+                        "Reduce processing footprint",
+                    ]
+        return info
+    def estimate_memory_requirement(self, video_width: int, video_height: int, frames_in_memory: int = 5) -> Dict[str, float]:
         bytes_per_frame = video_width * video_height * 3
+        overhead_multiplier = 3.0  # masks/intermediates
+        frames_gb = _bytes_to_gb(bytes_per_frame * frames_in_memory * overhead_multiplier)
+        estimate = {
+            "frames_memory_gb": round(frames_gb, 3),
+            "model_memory_gb": 4.0,
+            "system_overhead_gb": 2.0,
         }
+        estimate["total_estimated_gb"] = round(
+            estimate["frames_memory_gb"] + estimate["model_memory_gb"] + estimate["system_overhead_gb"], 3
+        )
+        return estimate
+    def can_process_video(self, video_width: int, video_height: int, frames_in_memory: int = 5) -> Dict[str, Any]:
         estimate = self.estimate_memory_requirement(video_width, video_height, frames_in_memory)
+        usage = self.get_memory_usage()
+        if self.gpu_available:
+            available = usage.get("vram_free_gb", 0.0)
         else:
+            available = usage.get("system_available_gb", 0.0)
+        can = estimate["total_estimated_gb"] <= available
+        return {
+            "can_process": can,
+            "estimated_memory_gb": estimate["total_estimated_gb"],
+            "available_memory_gb": available,
+            "memory_margin_gb": round(available - estimate["total_estimated_gb"], 3),
+            "recommendations": [] if can else [
+                "Reduce resolution or duration",
                 "Process in smaller chunks",
+                "Run aggressive cleanup before start",
+            ],
         }
     def get_stats(self) -> Dict[str, Any]:
         return {
+            "cleanup_count": self.stats["cleanup_count"],
+            "peak_memory_usage_gb": self.stats["peak_memory_usage"],
+            "device_type": self.device_type,
+            "memory_limit_gb": self.memory_limit_gb,
+            "applied_fraction": self.applied_fraction,
+            "monitoring_active": self.monitoring_active,
+            "callbacks_registered": len(self.cleanup_callbacks),
         }
     def __del__(self):
         try:
             self.stop_monitoring()
             self.cleanup_aggressive()
         except Exception:
+            pass