MogensR commited on
Commit
a70dcf0
·
1 Parent(s): 05b645d

Update utils/system/memory_manager.py

Browse files
Files changed (1) hide show
  1. utils/system/memory_manager.py +377 -399
utils/system/memory_manager.py CHANGED
@@ -1,473 +1,451 @@
 
1
  """
2
- Memory Management Module
3
- Handles memory cleanup, monitoring, and GPU resource management
 
 
 
 
 
 
 
 
 
4
  """
5
 
 
6
  import gc
7
  import os
8
- import psutil
9
- import torch
10
  import time
11
  import logging
12
  import threading
13
  from typing import Dict, Any, Optional, Callable
14
- from core.exceptions import MemoryError, ResourceExhaustionError # Updated import path
 
 
 
 
 
 
 
 
 
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class MemoryManager:
19
  """
20
- Comprehensive memory management for video processing applications
21
  """
22
-
23
  def __init__(self, device, memory_limit_gb: Optional[float] = None):
24
- if isinstance(device, str):
25
- device = torch.device(device)
26
- self.device = device
27
- self.gpu_available = device.type in ['cuda', 'mps']
 
 
 
 
 
 
 
 
28
  self.memory_limit_gb = memory_limit_gb
29
- self.cleanup_callbacks = []
30
  self.monitoring_active = False
31
- self.monitoring_thread = None
32
  self.stats = {
33
- 'cleanup_count': 0,
34
- 'peak_memory_usage': 0.0,
35
- 'total_allocated': 0.0,
36
- 'total_freed': 0.0
37
  }
38
-
39
- # Initialize memory monitoring
40
  self._initialize_memory_limits()
41
- logger.info(f"MemoryManager initialized for device: {device}")
42
-
 
 
 
 
43
  def _initialize_memory_limits(self):
44
- """Initialize memory limits based on device and system"""
45
- if self.device.type == 'cuda':
46
- try:
47
- device_idx = self.device.index or 0
48
- device_props = torch.cuda.get_device_properties(device_idx)
49
- total_memory_gb = device_props.total_memory / (1024**3)
50
-
51
- # Use 80% of GPU memory as default limit if not specified
52
  if self.memory_limit_gb is None:
53
- self.memory_limit_gb = total_memory_gb * 0.8
54
-
55
- logger.info(f"CUDA memory limit set to {self.memory_limit_gb:.1f}GB "
56
- f"(total: {total_memory_gb:.1f}GB)")
57
-
58
- except Exception as e:
59
- logger.warning(f"Could not get CUDA memory info: {e}")
60
- self.memory_limit_gb = 4.0 # Conservative fallback
61
-
62
- elif self.device.type == 'mps':
63
- # MPS uses unified memory, so check system memory
64
- system_memory_gb = psutil.virtual_memory().total / (1024**3)
65
- if self.memory_limit_gb is None:
66
- # Use 50% of system memory for MPS as it shares with system
67
- self.memory_limit_gb = system_memory_gb * 0.5
68
-
69
- logger.info(f"MPS memory limit set to {self.memory_limit_gb:.1f}GB "
70
- f"(system: {system_memory_gb:.1f}GB)")
71
-
72
- else: # CPU
73
- system_memory_gb = psutil.virtual_memory().total / (1024**3)
74
  if self.memory_limit_gb is None:
75
- # Use 60% of system memory for CPU processing
76
- self.memory_limit_gb = system_memory_gb * 0.6
77
-
78
- logger.info(f"CPU memory limit set to {self.memory_limit_gb:.1f}GB "
79
- f"(system: {system_memory_gb:.1f}GB)")
80
-
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def get_memory_usage(self) -> Dict[str, Any]:
82
- """Get comprehensive memory usage statistics"""
83
- usage = {
84
- 'device_type': self.device.type,
85
- 'memory_limit_gb': self.memory_limit_gb,
86
- 'timestamp': time.time()
87
  }
88
-
89
- try:
90
- if self.device.type == 'cuda':
91
- device_idx = self.device.index or 0
92
-
93
- # GPU memory
94
- allocated = torch.cuda.memory_allocated(device_idx)
95
- reserved = torch.cuda.memory_reserved(device_idx)
96
- total = torch.cuda.get_device_properties(device_idx).total_memory
97
-
98
- usage.update({
99
- 'gpu_allocated_gb': allocated / (1024**3),
100
- 'gpu_reserved_gb': reserved / (1024**3),
101
- 'gpu_total_gb': total / (1024**3),
102
- 'gpu_utilization_percent': (allocated / total) * 100,
103
- 'gpu_reserved_percent': (reserved / total) * 100,
104
- 'gpu_free_gb': (total - reserved) / (1024**3)
105
- })
106
-
107
- # Peak memory tracking
108
- max_allocated = torch.cuda.max_memory_allocated(device_idx)
109
- max_reserved = torch.cuda.max_memory_reserved(device_idx)
110
- usage.update({
111
- 'gpu_max_allocated_gb': max_allocated / (1024**3),
112
- 'gpu_max_reserved_gb': max_reserved / (1024**3)
113
- })
114
-
115
- elif self.device.type == 'mps':
116
- # MPS doesn't have explicit memory tracking like CUDA
117
- # Fall back to system memory monitoring
118
  vm = psutil.virtual_memory()
119
- usage.update({
120
- 'system_memory_gb': vm.total / (1024**3),
121
- 'system_available_gb': vm.available / (1024**3),
122
- 'system_used_gb': vm.used / (1024**3),
123
- 'system_utilization_percent': vm.percent
124
- })
125
-
126
- except Exception as e:
127
- logger.warning(f"Error getting GPU memory usage: {e}")
128
-
129
- # Always include system memory info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  try:
131
- vm = psutil.virtual_memory()
132
- swap = psutil.swap_memory()
133
-
134
- usage.update({
135
- 'system_total_gb': vm.total / (1024**3),
136
- 'system_available_gb': vm.available / (1024**3),
137
- 'system_used_gb': vm.used / (1024**3),
138
- 'system_percent': vm.percent,
139
- 'swap_total_gb': swap.total / (1024**3),
140
- 'swap_used_gb': swap.used / (1024**3),
141
- 'swap_percent': swap.percent
142
- })
143
-
144
- except Exception as e:
145
- logger.warning(f"Error getting system memory usage: {e}")
146
-
147
- # Process-specific memory
 
 
 
 
 
 
 
 
 
 
148
  try:
149
- process = psutil.Process()
150
- memory_info = process.memory_info()
151
- usage.update({
152
- 'process_rss_gb': memory_info.rss / (1024**3), # Physical memory
153
- 'process_vms_gb': memory_info.vms / (1024**3), # Virtual memory
154
- })
155
-
156
  except Exception as e:
157
- logger.warning(f"Error getting process memory usage: {e}")
158
-
159
- # Update peak tracking
160
- current_usage = usage.get('gpu_allocated_gb', usage.get('system_used_gb', 0))
161
- if current_usage > self.stats['peak_memory_usage']:
162
- self.stats['peak_memory_usage'] = current_usage
163
-
164
- return usage
165
-
166
- def cleanup_basic(self):
167
- """Basic memory cleanup - lightweight operation"""
168
  try:
169
  gc.collect()
170
-
171
- if self.device.type == 'cuda':
 
 
172
  torch.cuda.empty_cache()
173
-
174
- self.stats['cleanup_count'] += 1
175
- logger.debug("Basic memory cleanup completed")
176
-
177
- except Exception as e:
178
- logger.warning(f"Basic memory cleanup failed: {e}")
179
-
180
- def cleanup_aggressive(self):
181
- """Aggressive memory cleanup - more thorough but slower"""
182
- try:
183
- start_time = time.time()
184
-
185
- # Run all registered cleanup callbacks first
186
- for callback in self.cleanup_callbacks:
187
- try:
188
- callback()
189
- except Exception as e:
190
- logger.warning(f"Cleanup callback failed: {e}")
191
-
192
- # Multiple garbage collection passes
193
- for _ in range(3):
194
- gc.collect()
195
-
196
- if self.device.type == 'cuda':
197
- # CUDA-specific aggressive cleanup
198
  torch.cuda.empty_cache()
199
- torch.cuda.synchronize()
200
-
201
- # Reset peak memory statistics
202
- device_idx = self.device.index or 0
203
- torch.cuda.reset_peak_memory_stats(device_idx)
204
-
205
- elif self.device.type == 'mps':
206
- # MPS cleanup - mainly garbage collection
207
- # Could add MPS-specific operations if available
208
  pass
209
-
210
- cleanup_time = time.time() - start_time
211
- self.stats['cleanup_count'] += 1
212
-
213
- logger.debug(f"Aggressive memory cleanup completed in {cleanup_time:.2f}s")
214
-
215
- except Exception as e:
216
- logger.error(f"Aggressive memory cleanup failed: {e}")
217
- raise MemoryError("aggressive_cleanup", str(e))
218
-
219
- def check_memory_pressure(self, threshold_percent: float = 85.0) -> Dict[str, Any]:
220
- """Check if system is under memory pressure"""
221
- usage = self.get_memory_usage()
222
-
223
- pressure_info = {
224
- 'under_pressure': False,
225
- 'pressure_level': 'normal', # normal, warning, critical
226
- 'recommendations': [],
227
- 'usage_percent': 0.0
228
- }
229
-
230
- # Determine usage percentage based on device type
231
- if self.device.type == 'cuda':
232
- usage_percent = usage.get('gpu_utilization_percent', 0)
233
- pressure_info['usage_percent'] = usage_percent
234
-
235
- if usage_percent >= threshold_percent:
236
- pressure_info['under_pressure'] = True
237
-
238
- if usage_percent >= 95:
239
- pressure_info['pressure_level'] = 'critical'
240
- pressure_info['recommendations'].extend([
241
- 'Reduce batch size immediately',
242
- 'Enable gradient checkpointing',
243
- 'Consider switching to CPU processing'
244
- ])
245
- elif usage_percent >= threshold_percent:
246
- pressure_info['pressure_level'] = 'warning'
247
- pressure_info['recommendations'].extend([
248
- 'Run aggressive memory cleanup',
249
- 'Reduce keyframe interval',
250
- 'Monitor memory usage closely'
251
- ])
252
-
253
- else: # CPU or MPS - use system memory
254
- usage_percent = usage.get('system_percent', 0)
255
- pressure_info['usage_percent'] = usage_percent
256
-
257
- if usage_percent >= threshold_percent:
258
- pressure_info['under_pressure'] = True
259
-
260
- if usage_percent >= 95:
261
- pressure_info['pressure_level'] = 'critical'
262
- pressure_info['recommendations'].extend([
263
- 'Free system memory immediately',
264
- 'Close unnecessary applications',
265
- 'Reduce video processing quality'
266
- ])
267
- elif usage_percent >= threshold_percent:
268
- pressure_info['pressure_level'] = 'warning'
269
- pressure_info['recommendations'].extend([
270
- 'Run memory cleanup',
271
- 'Monitor system memory',
272
- 'Consider processing in smaller chunks'
273
- ])
274
-
275
- return pressure_info
276
-
277
- def auto_cleanup_if_needed(self, pressure_threshold: float = 80.0) -> bool:
278
- """Automatically run cleanup if memory pressure is detected"""
279
- pressure = self.check_memory_pressure(pressure_threshold)
280
-
281
- if pressure['under_pressure']:
282
- cleanup_method = (
283
- self.cleanup_aggressive
284
- if pressure['pressure_level'] == 'critical'
285
- else self.cleanup_basic
286
- )
287
-
288
- logger.info(f"Auto-cleanup triggered due to {pressure['pressure_level']} "
289
- f"memory pressure ({pressure['usage_percent']:.1f}%)")
290
-
291
- cleanup_method()
292
- return True
293
-
294
- return False
295
-
296
  def register_cleanup_callback(self, callback: Callable):
297
- """Register a callback to run during cleanup operations"""
298
  self.cleanup_callbacks.append(callback)
299
- logger.debug("Cleanup callback registered")
300
-
301
- def start_monitoring(self, interval_seconds: float = 30.0,
302
- pressure_callback: Optional[Callable] = None):
303
- """Start background memory monitoring"""
304
  if self.monitoring_active:
305
  logger.warning("Memory monitoring already active")
306
  return
307
-
308
  self.monitoring_active = True
309
-
310
- def monitor_loop():
311
  while self.monitoring_active:
312
  try:
313
  pressure = self.check_memory_pressure()
314
-
315
- if pressure['under_pressure']:
316
- logger.warning(f"Memory pressure detected: {pressure['pressure_level']} "
317
- f"({pressure['usage_percent']:.1f}%)")
318
-
319
  if pressure_callback:
320
  try:
321
  pressure_callback(pressure)
322
  except Exception as e:
323
  logger.error(f"Pressure callback failed: {e}")
324
-
325
- # Auto-cleanup on critical pressure
326
- if pressure['pressure_level'] == 'critical':
327
  self.cleanup_aggressive()
328
-
329
- time.sleep(interval_seconds)
330
-
331
  except Exception as e:
332
  logger.error(f"Memory monitoring error: {e}")
333
- time.sleep(interval_seconds)
334
-
335
- self.monitoring_thread = threading.Thread(target=monitor_loop, daemon=True)
336
  self.monitoring_thread.start()
337
-
338
  logger.info(f"Memory monitoring started (interval: {interval_seconds}s)")
339
-
340
  def stop_monitoring(self):
341
- """Stop background memory monitoring"""
342
  if self.monitoring_active:
343
  self.monitoring_active = False
344
  if self.monitoring_thread and self.monitoring_thread.is_alive():
345
  self.monitoring_thread.join(timeout=5.0)
346
  logger.info("Memory monitoring stopped")
347
-
348
- def estimate_memory_requirement(self, video_width: int, video_height: int,
349
- frames_in_memory: int = 5) -> Dict[str, float]:
350
- """Estimate memory requirements for video processing"""
351
-
352
- # Base memory per frame (RGB image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  bytes_per_frame = video_width * video_height * 3
354
-
355
- # Additional overhead for processing
356
- overhead_multiplier = 3.0 # For masks, intermediate results, etc.
357
-
358
- estimated_memory = {
359
- 'frames_memory_gb': (bytes_per_frame * frames_in_memory * overhead_multiplier) / (1024**3),
360
- 'model_memory_gb': 4.0, # Rough estimate for SAM2 + MatAnyone
361
- 'system_overhead_gb': 2.0,
362
- 'total_estimated_gb': 0.0
363
  }
364
-
365
- estimated_memory['total_estimated_gb'] = sum([
366
- estimated_memory['frames_memory_gb'],
367
- estimated_memory['model_memory_gb'],
368
- estimated_memory['system_overhead_gb']
369
- ])
370
-
371
- return estimated_memory
372
-
373
- def can_process_video(self, video_width: int, video_height: int,
374
- frames_in_memory: int = 5) -> Dict[str, Any]:
375
- """Check if video can be processed with current memory"""
376
-
377
  estimate = self.estimate_memory_requirement(video_width, video_height, frames_in_memory)
378
- current_usage = self.get_memory_usage()
379
-
380
- # Available memory calculation
381
- if self.device.type == 'cuda':
382
- available_memory = current_usage.get('gpu_free_gb', 0)
383
  else:
384
- available_memory = current_usage.get('system_available_gb', 0)
385
-
386
- can_process = estimate['total_estimated_gb'] <= available_memory
387
-
388
- result = {
389
- 'can_process': can_process,
390
- 'estimated_memory_gb': estimate['total_estimated_gb'],
391
- 'available_memory_gb': available_memory,
392
- 'memory_margin_gb': available_memory - estimate['total_estimated_gb'],
393
- 'recommendations': []
394
- }
395
-
396
- if not can_process:
397
- deficit = estimate['total_estimated_gb'] - available_memory
398
- result['recommendations'] = [
399
- f"Free {deficit:.1f}GB of memory",
400
- "Reduce video resolution",
401
  "Process in smaller chunks",
402
- "Use lower quality settings"
403
- ]
404
- elif result['memory_margin_gb'] < 1.0:
405
- result['recommendations'] = [
406
- "Memory margin is low",
407
- "Monitor memory usage during processing",
408
- "Consider reducing batch size"
409
- ]
410
-
411
- return result
412
-
413
- def get_optimization_suggestions(self) -> Dict[str, Any]:
414
- """Get memory optimization suggestions based on current state"""
415
- usage = self.get_memory_usage()
416
-
417
- suggestions = {
418
- 'current_usage_percent': usage.get('gpu_utilization_percent', usage.get('system_percent', 0)),
419
- 'suggestions': [],
420
- 'priority': 'low' # low, medium, high
421
  }
422
-
423
- usage_percent = suggestions['current_usage_percent']
424
-
425
- if usage_percent >= 90:
426
- suggestions['priority'] = 'high'
427
- suggestions['suggestions'].extend([
428
- 'Run aggressive memory cleanup immediately',
429
- 'Reduce batch size to 1',
430
- 'Enable gradient checkpointing if available',
431
- 'Consider switching to CPU processing'
432
- ])
433
- elif usage_percent >= 75:
434
- suggestions['priority'] = 'medium'
435
- suggestions['suggestions'].extend([
436
- 'Run memory cleanup regularly',
437
- 'Monitor memory usage closely',
438
- 'Reduce keyframe interval',
439
- 'Use mixed precision if supported'
440
- ])
441
- elif usage_percent >= 50:
442
- suggestions['priority'] = 'low'
443
- suggestions['suggestions'].extend([
444
- 'Current usage is acceptable',
445
- 'Regular cleanup should be sufficient',
446
- 'Monitor for memory leaks during long operations'
447
- ])
448
- else:
449
- suggestions['suggestions'] = [
450
- 'Memory usage is optimal',
451
- 'No immediate action required'
452
- ]
453
-
454
- return suggestions
455
-
456
  def get_stats(self) -> Dict[str, Any]:
457
- """Get memory management statistics"""
458
  return {
459
- 'cleanup_count': self.stats['cleanup_count'],
460
- 'peak_memory_usage_gb': self.stats['peak_memory_usage'],
461
- 'monitoring_active': self.monitoring_active,
462
- 'device_type': self.device.type,
463
- 'memory_limit_gb': self.memory_limit_gb,
464
- 'registered_callbacks': len(self.cleanup_callbacks)
 
465
  }
466
-
467
  def __del__(self):
468
- """Cleanup when MemoryManager is destroyed"""
469
  try:
470
  self.stop_monitoring()
471
  self.cleanup_aggressive()
472
  except Exception:
473
- pass # Ignore errors during cleanup
 
1
+ #!/usr/bin/env python3
2
  """
3
+ Memory Manager for BackgroundFX Pro
4
+ - Safe on CPU/CUDA/MPS (mostly CUDA/T4 on Spaces)
5
+ - Accepts `device` as str or torch.device
6
+ - Optional per-process VRAM cap (env or method)
7
+ - Detailed usage reporting (CPU/RAM + VRAM + torch allocator)
8
+ - Light and aggressive cleanup paths
9
+ - Background monitor (optional)
10
+
11
+ Env switches:
12
+ BFX_DISABLE_LIMIT=1 -> do not set VRAM fraction automatically
13
+ BFX_CUDA_FRACTION=0.80 -> fraction to cap per-process VRAM (0.10..0.95)
14
  """
15
 
16
+ from __future__ import annotations
17
  import gc
18
  import os
 
 
19
  import time
20
  import logging
21
  import threading
22
  from typing import Dict, Any, Optional, Callable
23
+
24
+ # Optional deps
25
+ try:
26
+ import psutil
27
+ except Exception: # pragma: no cover
28
+ psutil = None
29
+
30
+ try:
31
+ import torch
32
+ except Exception: # pragma: no cover
33
+ torch = None
34
 
35
  logger = logging.getLogger(__name__)
36
 
37
+ # ---- local exception to avoid shadowing built-in MemoryError ----
38
+ class MemoryManagerError(Exception):
39
+ pass
40
+
41
+
42
+ def _bytes_to_gb(x: int | float) -> float:
43
+ try:
44
+ return float(x) / (1024**3)
45
+ except Exception:
46
+ return 0.0
47
+
48
+
49
+ def _normalize_device(dev) -> "torch.device":
50
+ if torch is None:
51
+ # fake CPU device
52
+ class _Fake:
53
+ type = "cpu"
54
+ index = None
55
+ return _Fake() # type: ignore[return-value]
56
+
57
+ if isinstance(dev, str):
58
+ return torch.device(dev)
59
+ if hasattr(dev, "type"):
60
+ return dev
61
+ # default CPU
62
+ return torch.device("cpu")
63
+
64
+
65
+ def _cuda_index(device) -> Optional[int]:
66
+ if getattr(device, "type", "cpu") != "cuda":
67
+ return None
68
+ idx = getattr(device, "index", None)
69
+ if idx is None:
70
+ # normalize bare "cuda" to 0
71
+ return 0
72
+ return int(idx)
73
+
74
+
75
  class MemoryManager:
76
  """
77
+ Comprehensive memory management with VRAM cap + cleanup utilities.
78
  """
79
+
80
  def __init__(self, device, memory_limit_gb: Optional[float] = None):
81
+ self.device = _normalize_device(device)
82
+ self.device_type = getattr(self.device, "type", "cpu")
83
+ self.cuda_idx = _cuda_index(self.device)
84
+
85
+ self.gpu_available = bool(
86
+ torch and self.device_type == "cuda" and torch.cuda.is_available()
87
+ )
88
+ self.mps_available = bool(
89
+ torch and self.device_type == "mps" and getattr(torch.backends, "mps", None)
90
+ and torch.backends.mps.is_available()
91
+ )
92
+
93
  self.memory_limit_gb = memory_limit_gb
94
+ self.cleanup_callbacks: list[Callable] = []
95
  self.monitoring_active = False
96
+ self.monitoring_thread: Optional[threading.Thread] = None
97
  self.stats = {
98
+ "cleanup_count": 0,
99
+ "peak_memory_usage": 0.0,
100
+ "total_allocated": 0.0,
101
+ "total_freed": 0.0,
102
  }
103
+ self.applied_fraction: Optional[float] = None
104
+
105
  self._initialize_memory_limits()
106
+ self._maybe_apply_vram_fraction()
107
+ logger.info(f"MemoryManager initialized (device={self.device}, cuda={self.gpu_available})")
108
+
109
+ # -------------------------------
110
+ # init helpers
111
+ # -------------------------------
112
  def _initialize_memory_limits(self):
113
+ try:
114
+ if self.gpu_available:
115
+ props = torch.cuda.get_device_properties(self.cuda_idx or 0)
116
+ total_gb = _bytes_to_gb(props.total_memory)
 
 
 
 
117
  if self.memory_limit_gb is None:
118
+ self.memory_limit_gb = max(0.5, total_gb * 0.80) # default 80%
119
+ logger.info(
120
+ f"CUDA memory limit baseline ~{self.memory_limit_gb:.1f}GB "
121
+ f"(device total {total_gb:.1f}GB)"
122
+ )
123
+ elif self.mps_available:
124
+ vm = psutil.virtual_memory() if psutil else None
125
+ total_gb = _bytes_to_gb(vm.total) if vm else 0.0
126
+ if self.memory_limit_gb is None:
127
+ self.memory_limit_gb = max(0.5, total_gb * 0.50)
128
+ logger.info(f"MPS memory baseline ~{self.memory_limit_gb:.1f}GB (system {total_gb:.1f}GB)")
129
+ else:
130
+ vm = psutil.virtual_memory() if psutil else None
131
+ total_gb = _bytes_to_gb(vm.total) if vm else 0.0
132
+ if self.memory_limit_gb is None:
133
+ self.memory_limit_gb = max(0.5, total_gb * 0.60)
134
+ logger.info(f"CPU memory baseline ~{self.memory_limit_gb:.1f}GB (system {total_gb:.1f}GB)")
135
+ except Exception as e:
136
+ logger.warning(f"Memory limit init failed: {e}")
 
 
137
  if self.memory_limit_gb is None:
138
+ self.memory_limit_gb = 4.0 # conservative fallback
139
+
140
+ def _maybe_apply_vram_fraction(self):
141
+ if not self.gpu_available or torch is None:
142
+ return
143
+ if os.environ.get("BFX_DISABLE_LIMIT", ""):
144
+ return
145
+ frac_env = os.environ.get("BFX_CUDA_FRACTION", "").strip()
146
+ try:
147
+ fraction = float(frac_env) if frac_env else 0.80
148
+ except Exception:
149
+ fraction = 0.80
150
+ applied = self.limit_cuda_memory(fraction=fraction)
151
+ if applied:
152
+ logger.info(f"Per-process CUDA memory fraction set to {applied:.2f} on device {self.cuda_idx or 0}")
153
+
154
+ # -------------------------------
155
+ # public API
156
+ # -------------------------------
157
  def get_memory_usage(self) -> Dict[str, Any]:
158
+ usage: Dict[str, Any] = {
159
+ "device_type": self.device_type,
160
+ "memory_limit_gb": self.memory_limit_gb,
161
+ "timestamp": time.time(),
 
162
  }
163
+
164
+ # CPU / system
165
+ if psutil:
166
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  vm = psutil.virtual_memory()
168
+ usage.update(
169
+ dict(
170
+ system_total_gb=round(_bytes_to_gb(vm.total), 3),
171
+ system_available_gb=round(_bytes_to_gb(vm.available), 3),
172
+ system_used_gb=round(_bytes_to_gb(vm.used), 3),
173
+ system_percent=float(vm.percent),
174
+ )
175
+ )
176
+ swap = psutil.swap_memory()
177
+ usage.update(
178
+ dict(
179
+ swap_total_gb=round(_bytes_to_gb(swap.total), 3),
180
+ swap_used_gb=round(_bytes_to_gb(swap.used), 3),
181
+ swap_percent=float(swap.percent),
182
+ )
183
+ )
184
+ proc = psutil.Process()
185
+ mi = proc.memory_info()
186
+ usage.update(
187
+ dict(
188
+ process_rss_gb=round(_bytes_to_gb(mi.rss), 3),
189
+ process_vms_gb=round(_bytes_to_gb(mi.vms), 3),
190
+ )
191
+ )
192
+ except Exception as e:
193
+ logger.debug(f"psutil stats error: {e}")
194
+
195
+ # GPU
196
+ if self.gpu_available and torch is not None:
197
+ try:
198
+ # mem_get_info returns (free, total) in bytes
199
+ free_b, total_b = torch.cuda.mem_get_info(self.cuda_idx or 0)
200
+ used_b = total_b - free_b
201
+ usage.update(
202
+ dict(
203
+ vram_total_gb=round(_bytes_to_gb(total_b), 3),
204
+ vram_used_gb=round(_bytes_to_gb(used_b), 3),
205
+ vram_free_gb=round(_bytes_to_gb(free_b), 3),
206
+ vram_used_percent=float(used_b / total_b * 100.0) if total_b else 0.0,
207
+ )
208
+ )
209
+ except Exception as e:
210
+ logger.debug(f"mem_get_info failed: {e}")
211
+
212
+ # torch allocator stats
213
+ try:
214
+ idx = self.cuda_idx or 0
215
+ allocated = torch.cuda.memory_allocated(idx)
216
+ reserved = torch.cuda.memory_reserved(idx)
217
+ usage["torch_allocated_gb"] = round(_bytes_to_gb(allocated), 3)
218
+ usage["torch_reserved_gb"] = round(_bytes_to_gb(reserved), 3)
219
+ # inactive split (2.x)
220
+ try:
221
+ inactive = torch.cuda.memory_stats(idx).get("inactive_split_bytes.all.current", 0)
222
+ usage["torch_inactive_split_gb"] = round(_bytes_to_gb(inactive), 3)
223
+ except Exception:
224
+ pass
225
+ except Exception as e:
226
+ logger.debug(f"allocator stats failed: {e}")
227
+
228
+ usage["applied_fraction"] = self.applied_fraction
229
+
230
+ # Update peak tracker
231
+ current = usage.get("vram_used_gb", usage.get("system_used_gb", 0.0))
232
  try:
233
+ if float(current) > float(self.stats["peak_memory_usage"]):
234
+ self.stats["peak_memory_usage"] = float(current)
235
+ except Exception:
236
+ pass
237
+
238
+ return usage
239
+
240
+ def limit_cuda_memory(self, fraction: Optional[float] = None, max_gb: Optional[float] = None) -> Optional[float]:
241
+ if not self.gpu_available or torch is None:
242
+ return None
243
+
244
+ # derive fraction from max_gb if provided
245
+ if max_gb is not None:
246
+ try:
247
+ _, total_b = torch.cuda.mem_get_info(self.cuda_idx or 0)
248
+ total_gb = _bytes_to_gb(total_b)
249
+ if total_gb <= 0:
250
+ return None
251
+ fraction = min(max(0.10, max_gb / total_gb), 0.95)
252
+ except Exception as e:
253
+ logger.debug(f"fraction from max_gb failed: {e}")
254
+ return None
255
+
256
+ if fraction is None:
257
+ fraction = 0.80
258
+ fraction = float(max(0.10, min(0.95, fraction)))
259
+
260
  try:
261
+ torch.cuda.set_per_process_memory_fraction(fraction, device=self.cuda_idx or 0)
262
+ self.applied_fraction = fraction
263
+ return fraction
 
 
 
 
264
  except Exception as e:
265
+ logger.debug(f"set_per_process_memory_fraction failed: {e}")
266
+ return None
267
+
268
+ def cleanup(self) -> None:
269
+ """Light cleanup used frequently between steps."""
 
 
 
 
 
 
270
  try:
271
  gc.collect()
272
+ except Exception:
273
+ pass
274
+ if self.gpu_available and torch is not None:
275
+ try:
276
  torch.cuda.empty_cache()
277
+ except Exception:
278
+ pass
279
+ self.stats["cleanup_count"] += 1
280
+
281
+ def cleanup_basic(self) -> None:
282
+ """Alias kept for compatibility."""
283
+ self.cleanup()
284
+
285
+ def cleanup_aggressive(self) -> None:
286
+ """Aggressive cleanup for OOM recovery or big scene switches."""
287
+ if self.gpu_available and torch is not None:
288
+ try:
289
+ torch.cuda.synchronize(self.cuda_idx or 0)
290
+ except Exception:
291
+ pass
292
+ try:
 
 
 
 
 
 
 
 
 
293
  torch.cuda.empty_cache()
294
+ except Exception:
 
 
 
 
 
 
 
 
295
  pass
296
+ try:
297
+ torch.cuda.reset_peak_memory_stats(self.cuda_idx or 0)
298
+ except Exception:
299
+ pass
300
+ try:
301
+ if hasattr(torch.cuda, "ipc_collect"):
302
+ torch.cuda.ipc_collect()
303
+ except Exception:
304
+ pass
305
+ try:
306
+ gc.collect(); gc.collect()
307
+ except Exception:
308
+ pass
309
+ self.stats["cleanup_count"] += 1
310
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  def register_cleanup_callback(self, callback: Callable):
 
312
  self.cleanup_callbacks.append(callback)
313
+
314
+ def start_monitoring(self, interval_seconds: float = 30.0, pressure_callback: Optional[Callable] = None):
 
 
 
315
  if self.monitoring_active:
316
  logger.warning("Memory monitoring already active")
317
  return
 
318
  self.monitoring_active = True
319
+
320
+ def loop():
321
  while self.monitoring_active:
322
  try:
323
  pressure = self.check_memory_pressure()
324
+ if pressure["under_pressure"]:
325
+ logger.warning(
326
+ f"Memory pressure: {pressure['pressure_level']} "
327
+ f"({pressure['usage_percent']:.1f}%)"
328
+ )
329
  if pressure_callback:
330
  try:
331
  pressure_callback(pressure)
332
  except Exception as e:
333
  logger.error(f"Pressure callback failed: {e}")
334
+ if pressure["pressure_level"] == "critical":
 
 
335
  self.cleanup_aggressive()
 
 
 
336
  except Exception as e:
337
  logger.error(f"Memory monitoring error: {e}")
338
+ time.sleep(interval_seconds)
339
+
340
+ self.monitoring_thread = threading.Thread(target=loop, daemon=True)
341
  self.monitoring_thread.start()
 
342
  logger.info(f"Memory monitoring started (interval: {interval_seconds}s)")
343
+
344
  def stop_monitoring(self):
 
345
  if self.monitoring_active:
346
  self.monitoring_active = False
347
  if self.monitoring_thread and self.monitoring_thread.is_alive():
348
  self.monitoring_thread.join(timeout=5.0)
349
  logger.info("Memory monitoring stopped")
350
+
351
+ def check_memory_pressure(self, threshold_percent: float = 85.0) -> Dict[str, Any]:
352
+ usage = self.get_memory_usage()
353
+ info = {
354
+ "under_pressure": False,
355
+ "pressure_level": "normal",
356
+ "usage_percent": 0.0,
357
+ "recommendations": [],
358
+ }
359
+
360
+ if self.gpu_available:
361
+ percent = usage.get("vram_used_percent", 0.0)
362
+ info["usage_percent"] = percent
363
+ if percent >= threshold_percent:
364
+ info["under_pressure"] = True
365
+ if percent >= 95:
366
+ info["pressure_level"] = "critical"
367
+ info["recommendations"] += [
368
+ "Run aggressive memory cleanup",
369
+ "Reduce frame cache / chunk size",
370
+ "Lower resolution or disable previews",
371
+ ]
372
+ else:
373
+ info["pressure_level"] = "warning"
374
+ info["recommendations"] += [
375
+ "Run cleanup",
376
+ "Monitor memory usage",
377
+ "Reduce keyframe interval",
378
+ ]
379
+ else:
380
+ percent = usage.get("system_percent", 0.0)
381
+ info["usage_percent"] = percent
382
+ if percent >= threshold_percent:
383
+ info["under_pressure"] = True
384
+ if percent >= 95:
385
+ info["pressure_level"] = "critical"
386
+ info["recommendations"] += [
387
+ "Close other processes",
388
+ "Reduce resolution",
389
+ "Split video into chunks",
390
+ ]
391
+ else:
392
+ info["pressure_level"] = "warning"
393
+ info["recommendations"] += [
394
+ "Run cleanup",
395
+ "Monitor usage",
396
+ "Reduce processing footprint",
397
+ ]
398
+ return info
399
+
400
+ def estimate_memory_requirement(self, video_width: int, video_height: int, frames_in_memory: int = 5) -> Dict[str, float]:
401
  bytes_per_frame = video_width * video_height * 3
402
+ overhead_multiplier = 3.0 # masks/intermediates
403
+ frames_gb = _bytes_to_gb(bytes_per_frame * frames_in_memory * overhead_multiplier)
404
+ estimate = {
405
+ "frames_memory_gb": round(frames_gb, 3),
406
+ "model_memory_gb": 4.0,
407
+ "system_overhead_gb": 2.0,
 
 
 
408
  }
409
+ estimate["total_estimated_gb"] = round(
410
+ estimate["frames_memory_gb"] + estimate["model_memory_gb"] + estimate["system_overhead_gb"], 3
411
+ )
412
+ return estimate
413
+
414
+ def can_process_video(self, video_width: int, video_height: int, frames_in_memory: int = 5) -> Dict[str, Any]:
 
 
 
 
 
 
 
415
  estimate = self.estimate_memory_requirement(video_width, video_height, frames_in_memory)
416
+ usage = self.get_memory_usage()
417
+ if self.gpu_available:
418
+ available = usage.get("vram_free_gb", 0.0)
 
 
419
  else:
420
+ available = usage.get("system_available_gb", 0.0)
421
+
422
+ can = estimate["total_estimated_gb"] <= available
423
+ return {
424
+ "can_process": can,
425
+ "estimated_memory_gb": estimate["total_estimated_gb"],
426
+ "available_memory_gb": available,
427
+ "memory_margin_gb": round(available - estimate["total_estimated_gb"], 3),
428
+ "recommendations": [] if can else [
429
+ "Reduce resolution or duration",
 
 
 
 
 
 
 
430
  "Process in smaller chunks",
431
+ "Run aggressive cleanup before start",
432
+ ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  }
434
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  def get_stats(self) -> Dict[str, Any]:
 
436
  return {
437
+ "cleanup_count": self.stats["cleanup_count"],
438
+ "peak_memory_usage_gb": self.stats["peak_memory_usage"],
439
+ "device_type": self.device_type,
440
+ "memory_limit_gb": self.memory_limit_gb,
441
+ "applied_fraction": self.applied_fraction,
442
+ "monitoring_active": self.monitoring_active,
443
+ "callbacks_registered": len(self.cleanup_callbacks),
444
  }
445
+
446
  def __del__(self):
 
447
  try:
448
  self.stop_monitoring()
449
  self.cleanup_aggressive()
450
  except Exception:
451
+ pass