MogensR commited on
Commit
f72657c
·
verified ·
1 Parent(s): 57682af

Update model_loaders.py

Browse files
Files changed (1) hide show
  1. model_loaders.py +44 -24
model_loaders.py CHANGED
@@ -61,7 +61,7 @@ def clear_model_cache():
61
  def load_sam2_predictor():
62
  """
63
  Lazy load SAM2 image predictor with fallback strategies.
64
- Returns None if loading fails.
65
  """
66
  try:
67
  print("Loading SAM2 image predictor...", flush=True)
@@ -70,7 +70,7 @@ def load_sam2_predictor():
70
 
71
  # Determine device
72
  device = "cuda" if torch.cuda.is_available() else "cpu"
73
- print(f"Using device: {device}", flush=True)
74
 
75
  # Try local checkpoints first
76
  checkpoint_path = "/home/user/app/checkpoints/sam2.1_hiera_large.pt"
@@ -104,24 +104,26 @@ def load_sam2_predictor():
104
  sam2_model = build_sam2(model_cfg, checkpoint_path, device=device)
105
  predictor = SAM2ImagePredictor(sam2_model)
106
 
107
- # Verify model is on correct device
108
  if hasattr(predictor, 'model'):
109
  predictor.model.to(device)
110
- print(f"Model moved to {device}", flush=True)
 
111
 
112
- print("✅ SAM2 image predictor loaded successfully!", flush=True)
113
- return predictor
114
 
115
  except Exception as e:
116
- print(f"Failed to load SAM2 predictor: {e}", flush=True)
117
  import traceback
118
  traceback.print_exc()
119
- return None
120
 
121
- # Alias for new app.py
122
  def load_sam2():
123
- """Alias for load_sam2_predictor() - for compatibility with app.py"""
124
- return load_sam2_predictor()
 
125
 
126
  # ============================================================================
127
  # MatAnyone Model Loading
@@ -130,8 +132,8 @@ def load_sam2():
130
  @st.cache_resource(show_spinner=False)
131
  def load_matanyone_processor():
132
  """
133
- Lazy load MatAnyone processor.
134
- Returns None if loading fails.
135
  """
136
  try:
137
  print("Loading MatAnyone processor...", flush=True)
@@ -141,21 +143,34 @@ def load_matanyone_processor():
141
  device = "cuda" if torch.cuda.is_available() else "cpu"
142
  print(f"MatAnyone using device: {device}", flush=True)
143
 
 
144
  processor = InferenceCore("PeiqingYang/MatAnyone", device=device)
145
 
146
- print("✅ MatAnyone processor loaded successfully!", flush=True)
147
- return processor
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  except Exception as e:
150
- print(f"Failed to load MatAnyone: {e}", flush=True)
151
  import traceback
152
  traceback.print_exc()
153
- return None
154
 
155
- # Alias for new app.py
156
  def load_matanyone():
157
- """Alias for load_matanyone_processor() - for compatibility with app.py"""
158
- return load_matanyone_processor()
 
159
 
160
  # ============================================================================
161
  # MediaPipe Pose
@@ -181,15 +196,16 @@ def test_models():
181
  Returns dict with test results.
182
  """
183
  results = {
184
- 'sam2': {'loaded': False, 'error': None},
185
- 'matanyone': {'loaded': False, 'error': None}
186
  }
187
 
188
  # Test SAM2
189
  try:
190
- sam2_predictor = load_sam2_predictor()
191
  if sam2_predictor is not None:
192
  results['sam2']['loaded'] = True
 
193
  else:
194
  results['sam2']['error'] = "Predictor returned None"
195
  except Exception as e:
@@ -197,9 +213,10 @@ def test_models():
197
 
198
  # Test MatAnyone
199
  try:
200
- matanyone_processor = load_matanyone_processor()
201
  if matanyone_processor is not None:
202
  results['matanyone']['loaded'] = True
 
203
  else:
204
  results['matanyone']['error'] = "Processor returned None"
205
  except Exception as e:
@@ -225,6 +242,7 @@ def log_memory_usage(stage=""):
225
 
226
  log_msg += f" | RAM {memory_info['ram_used']:.1f}GB used"
227
 
 
228
  logger.info(log_msg)
229
  return memory_info
230
 
@@ -243,6 +261,7 @@ def check_memory_available(required_gb=2.0):
243
 
244
  def free_memory_aggressive():
245
  """Aggressively free GPU and system memory."""
 
246
  logger.info("Performing aggressive memory cleanup...")
247
 
248
  # Clear model cache
@@ -260,5 +279,6 @@ def free_memory_aggressive():
260
  # System cleanup
261
  gc.collect()
262
 
 
263
  logger.info("Memory cleanup complete")
264
  log_memory_usage("after cleanup")
 
61
  def load_sam2_predictor():
62
  """
63
  Lazy load SAM2 image predictor with fallback strategies.
64
+ Returns (predictor, device) tuple. Returns (None, None) if loading fails.
65
  """
66
  try:
67
  print("Loading SAM2 image predictor...", flush=True)
 
70
 
71
  # Determine device
72
  device = "cuda" if torch.cuda.is_available() else "cpu"
73
+ print(f"Using device for SAM2: {device}", flush=True)
74
 
75
  # Try local checkpoints first
76
  checkpoint_path = "/home/user/app/checkpoints/sam2.1_hiera_large.pt"
 
104
  sam2_model = build_sam2(model_cfg, checkpoint_path, device=device)
105
  predictor = SAM2ImagePredictor(sam2_model)
106
 
107
+ # CRITICAL: Verify and force model to correct device
108
  if hasattr(predictor, 'model'):
109
  predictor.model.to(device)
110
+ predictor.model.eval()
111
+ print(f"SAM2 model moved to {device} and set to eval mode", flush=True)
112
 
113
+ print(f"✅ SAM2 loaded successfully on {device}!", flush=True)
114
+ return predictor, device
115
 
116
  except Exception as e:
117
+ print(f"Failed to load SAM2 predictor: {e}", flush=True)
118
  import traceback
119
  traceback.print_exc()
120
+ return None, None
121
 
122
+ # Alias for backward compatibility
123
  def load_sam2():
124
+ """Alias for load_sam2_predictor() - returns just predictor for compatibility"""
125
+ predictor, device = load_sam2_predictor()
126
+ return predictor
127
 
128
  # ============================================================================
129
  # MatAnyone Model Loading
 
132
  @st.cache_resource(show_spinner=False)
133
  def load_matanyone_processor():
134
  """
135
+ Lazy load MatAnyone processor with explicit GPU placement.
136
+ Returns (processor, device) tuple. Returns (None, None) if loading fails.
137
  """
138
  try:
139
  print("Loading MatAnyone processor...", flush=True)
 
143
  device = "cuda" if torch.cuda.is_available() else "cpu"
144
  print(f"MatAnyone using device: {device}", flush=True)
145
 
146
+ # Load processor with explicit device
147
  processor = InferenceCore("PeiqingYang/MatAnyone", device=device)
148
 
149
+ # CRITICAL: Verify the processor's model is actually on GPU
150
+ if hasattr(processor, 'model'):
151
+ processor.model.to(device)
152
+ processor.model.eval()
153
+ print(f"MatAnyone model explicitly moved to {device}", flush=True)
154
+
155
+ # Check if processor has device attribute and set it
156
+ if not hasattr(processor, 'device'):
157
+ processor.device = device
158
+ print(f"Set processor.device to {device}", flush=True)
159
+
160
+ print(f"✅ MatAnyone loaded successfully on {device}!", flush=True)
161
+ return processor, device
162
 
163
  except Exception as e:
164
+ print(f"Failed to load MatAnyone: {e}", flush=True)
165
  import traceback
166
  traceback.print_exc()
167
+ return None, None
168
 
169
+ # Alias for backward compatibility
170
  def load_matanyone():
171
+ """Alias for load_matanyone_processor() - returns just processor for compatibility"""
172
+ processor, device = load_matanyone_processor()
173
+ return processor
174
 
175
  # ============================================================================
176
  # MediaPipe Pose
 
196
  Returns dict with test results.
197
  """
198
  results = {
199
+ 'sam2': {'loaded': False, 'error': None, 'device': None},
200
+ 'matanyone': {'loaded': False, 'error': None, 'device': None}
201
  }
202
 
203
  # Test SAM2
204
  try:
205
+ sam2_predictor, sam2_device = load_sam2_predictor()
206
  if sam2_predictor is not None:
207
  results['sam2']['loaded'] = True
208
+ results['sam2']['device'] = sam2_device
209
  else:
210
  results['sam2']['error'] = "Predictor returned None"
211
  except Exception as e:
 
213
 
214
  # Test MatAnyone
215
  try:
216
+ matanyone_processor, matanyone_device = load_matanyone_processor()
217
  if matanyone_processor is not None:
218
  results['matanyone']['loaded'] = True
219
+ results['matanyone']['device'] = matanyone_device
220
  else:
221
  results['matanyone']['error'] = "Processor returned None"
222
  except Exception as e:
 
242
 
243
  log_msg += f" | RAM {memory_info['ram_used']:.1f}GB used"
244
 
245
+ print(log_msg, flush=True)
246
  logger.info(log_msg)
247
  return memory_info
248
 
 
261
 
262
  def free_memory_aggressive():
263
  """Aggressively free GPU and system memory."""
264
+ print("Performing aggressive memory cleanup...", flush=True)
265
  logger.info("Performing aggressive memory cleanup...")
266
 
267
  # Clear model cache
 
279
  # System cleanup
280
  gc.collect()
281
 
282
+ print("Memory cleanup complete", flush=True)
283
  logger.info("Memory cleanup complete")
284
  log_memory_usage("after cleanup")