primerz commited on
Commit
6de0612
·
verified ·
1 Parent(s): 345b083

Create memory_utils

Browse files
Files changed (1) hide show
  1. memory_utils +160 -0
memory_utils ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Memory management utilities for Pixagram AI Pixel Art Generator
3
+ Provides efficient GPU memory management and model offloading
4
+ """
5
+ import torch
6
+ import gc
7
+ import psutil
8
+ import os
9
+
10
+
11
+ class MemoryManager:
12
+ """Manages GPU and CPU memory efficiently for model offloading"""
13
+
14
+ def __init__(self, device='cuda', dtype=torch.float16, verbose=True):
15
+ self.device = device
16
+ self.dtype = dtype
17
+ self.verbose = verbose
18
+ self.models_on_gpu = set()
19
+
20
+ def offload_to_cpu(self, model, model_name="model"):
21
+ """Move model to CPU and free GPU memory"""
22
+ if model is None:
23
+ return model
24
+
25
+ try:
26
+ model = model.to("cpu")
27
+ self.models_on_gpu.discard(model_name)
28
+
29
+ if torch.cuda.is_available():
30
+ torch.cuda.empty_cache()
31
+ torch.cuda.synchronize()
32
+
33
+ if self.verbose:
34
+ print(f"[MEMORY] Offloaded {model_name} to CPU")
35
+ self.print_memory_status()
36
+
37
+ return model
38
+ except Exception as e:
39
+ print(f"[MEMORY] Error offloading {model_name}: {e}")
40
+ return model
41
+
42
+ def load_to_gpu(self, model, model_name="model"):
43
+ """Move model to GPU temporarily"""
44
+ if model is None:
45
+ return model
46
+
47
+ try:
48
+ model = model.to(self.device)
49
+ self.models_on_gpu.add(model_name)
50
+
51
+ if self.verbose:
52
+ print(f"[MEMORY] Loaded {model_name} to GPU")
53
+ self.print_memory_status()
54
+
55
+ return model
56
+ except Exception as e:
57
+ print(f"[MEMORY] Error loading {model_name} to GPU: {e}")
58
+ return model
59
+
60
+ def cleanup_memory(self, aggressive=True):
61
+ """Perform memory cleanup"""
62
+ if torch.cuda.is_available():
63
+ torch.cuda.empty_cache()
64
+ torch.cuda.synchronize()
65
+
66
+ if aggressive:
67
+ # Multiple GC passes for thorough cleanup
68
+ for _ in range(3):
69
+ gc.collect()
70
+ else:
71
+ gc.collect()
72
+
73
+ if self.verbose:
74
+ self.print_memory_status()
75
+
76
+ def print_memory_status(self):
77
+ """Print current memory usage"""
78
+ if torch.cuda.is_available():
79
+ allocated_gb = torch.cuda.memory_allocated() / 1024**3
80
+ reserved_gb = torch.cuda.memory_reserved() / 1024**3
81
+ print(f" GPU: {allocated_gb:.2f}GB allocated, {reserved_gb:.2f}GB reserved")
82
+
83
+ # CPU memory status
84
+ process = psutil.Process(os.getpid())
85
+ cpu_mb = process.memory_info().rss / 1024**2
86
+ print(f" CPU: {cpu_mb:.0f}MB used")
87
+
88
+ def get_available_gpu_memory(self):
89
+ """Get available GPU memory in GB"""
90
+ if not torch.cuda.is_available():
91
+ return 0
92
+
93
+ return (torch.cuda.get_device_properties(0).total_memory -
94
+ torch.cuda.memory_reserved()) / 1024**3
95
+
96
+ def can_fit_on_gpu(self, estimated_gb):
97
+ """Check if model of estimated size can fit on GPU"""
98
+ available = self.get_available_gpu_memory()
99
+ # Leave 1GB buffer for safety
100
+ return available > (estimated_gb + 1.0)
101
+
102
+
103
+ class ModelOffloader:
104
+ """Context manager for temporary GPU loading"""
105
+
106
+ def __init__(self, model, memory_manager, model_name="model"):
107
+ self.model = model
108
+ self.memory_manager = memory_manager
109
+ self.model_name = model_name
110
+ self.was_on_gpu = False
111
+
112
+ def __enter__(self):
113
+ """Move model to GPU"""
114
+ if self.model is not None and hasattr(self.model, 'device'):
115
+ self.was_on_gpu = (self.model.device.type == 'cuda')
116
+ if not self.was_on_gpu:
117
+ self.model = self.memory_manager.load_to_gpu(self.model, self.model_name)
118
+ return self.model
119
+
120
+ def __exit__(self, exc_type, exc_val, exc_tb):
121
+ """Move model back to CPU if it wasn't on GPU before"""
122
+ if self.model is not None and not self.was_on_gpu:
123
+ self.model = self.memory_manager.offload_to_cpu(self.model, self.model_name)
124
+
125
+
126
+ def optimize_for_zero_gpu(pipe):
127
+ """
128
+ Optimize pipeline for Hugging Face Spaces Zero GPU
129
+ This ensures models stay on CPU until @spaces.GPU decorator activates
130
+ """
131
+ if hasattr(pipe, 'enable_model_cpu_offload'):
132
+ pipe.enable_model_cpu_offload()
133
+ print("[MEMORY] Enabled model CPU offloading for Zero GPU")
134
+
135
+ if hasattr(pipe, 'enable_vae_slicing'):
136
+ pipe.enable_vae_slicing()
137
+ print("[MEMORY] Enabled VAE slicing for memory efficiency")
138
+
139
+ if hasattr(pipe, 'enable_vae_tiling'):
140
+ pipe.enable_vae_tiling()
141
+ print("[MEMORY] Enabled VAE tiling for memory efficiency")
142
+
143
+ return pipe
144
+
145
+
146
+ def estimate_model_size(model):
147
+ """Estimate model size in GB"""
148
+ if model is None:
149
+ return 0
150
+
151
+ total_params = 0
152
+ for param in model.parameters():
153
+ total_params += param.numel()
154
+
155
+ # Assuming float16 (2 bytes per param)
156
+ size_gb = (total_params * 2) / 1024**3
157
+ return size_gb
158
+
159
+
160
+ print("[OK] Memory management utilities loaded")