Update model_loaders.py
Browse files- model_loaders.py +44 -24
model_loaders.py
CHANGED
|
@@ -61,7 +61,7 @@ def clear_model_cache():
|
|
| 61 |
def load_sam2_predictor():
|
| 62 |
"""
|
| 63 |
Lazy load SAM2 image predictor with fallback strategies.
|
| 64 |
-
Returns None if loading fails.
|
| 65 |
"""
|
| 66 |
try:
|
| 67 |
print("Loading SAM2 image predictor...", flush=True)
|
|
@@ -70,7 +70,7 @@ def load_sam2_predictor():
|
|
| 70 |
|
| 71 |
# Determine device
|
| 72 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 73 |
-
print(f"Using device: {device}", flush=True)
|
| 74 |
|
| 75 |
# Try local checkpoints first
|
| 76 |
checkpoint_path = "/home/user/app/checkpoints/sam2.1_hiera_large.pt"
|
|
@@ -104,24 +104,26 @@ def load_sam2_predictor():
|
|
| 104 |
sam2_model = build_sam2(model_cfg, checkpoint_path, device=device)
|
| 105 |
predictor = SAM2ImagePredictor(sam2_model)
|
| 106 |
|
| 107 |
-
# Verify model
|
| 108 |
if hasattr(predictor, 'model'):
|
| 109 |
predictor.model.to(device)
|
| 110 |
-
|
|
|
|
| 111 |
|
| 112 |
-
print("✅ SAM2
|
| 113 |
-
return predictor
|
| 114 |
|
| 115 |
except Exception as e:
|
| 116 |
-
print(f"Failed to load SAM2 predictor: {e}", flush=True)
|
| 117 |
import traceback
|
| 118 |
traceback.print_exc()
|
| 119 |
-
return None
|
| 120 |
|
| 121 |
-
# Alias for
|
| 122 |
def load_sam2():
|
| 123 |
-
"""Alias for load_sam2_predictor() - for compatibility
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
# ============================================================================
|
| 127 |
# MatAnyone Model Loading
|
|
@@ -130,8 +132,8 @@ def load_sam2():
|
|
| 130 |
@st.cache_resource(show_spinner=False)
|
| 131 |
def load_matanyone_processor():
|
| 132 |
"""
|
| 133 |
-
Lazy load MatAnyone processor.
|
| 134 |
-
Returns None if loading fails.
|
| 135 |
"""
|
| 136 |
try:
|
| 137 |
print("Loading MatAnyone processor...", flush=True)
|
|
@@ -141,21 +143,34 @@ def load_matanyone_processor():
|
|
| 141 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 142 |
print(f"MatAnyone using device: {device}", flush=True)
|
| 143 |
|
|
|
|
| 144 |
processor = InferenceCore("PeiqingYang/MatAnyone", device=device)
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
except Exception as e:
|
| 150 |
-
print(f"Failed to load MatAnyone: {e}", flush=True)
|
| 151 |
import traceback
|
| 152 |
traceback.print_exc()
|
| 153 |
-
return None
|
| 154 |
|
| 155 |
-
# Alias for
|
| 156 |
def load_matanyone():
|
| 157 |
-
"""Alias for load_matanyone_processor() - for compatibility
|
| 158 |
-
|
|
|
|
| 159 |
|
| 160 |
# ============================================================================
|
| 161 |
# MediaPipe Pose
|
|
@@ -181,15 +196,16 @@ def test_models():
|
|
| 181 |
Returns dict with test results.
|
| 182 |
"""
|
| 183 |
results = {
|
| 184 |
-
'sam2': {'loaded': False, 'error': None},
|
| 185 |
-
'matanyone': {'loaded': False, 'error': None}
|
| 186 |
}
|
| 187 |
|
| 188 |
# Test SAM2
|
| 189 |
try:
|
| 190 |
-
sam2_predictor = load_sam2_predictor()
|
| 191 |
if sam2_predictor is not None:
|
| 192 |
results['sam2']['loaded'] = True
|
|
|
|
| 193 |
else:
|
| 194 |
results['sam2']['error'] = "Predictor returned None"
|
| 195 |
except Exception as e:
|
|
@@ -197,9 +213,10 @@ def test_models():
|
|
| 197 |
|
| 198 |
# Test MatAnyone
|
| 199 |
try:
|
| 200 |
-
matanyone_processor = load_matanyone_processor()
|
| 201 |
if matanyone_processor is not None:
|
| 202 |
results['matanyone']['loaded'] = True
|
|
|
|
| 203 |
else:
|
| 204 |
results['matanyone']['error'] = "Processor returned None"
|
| 205 |
except Exception as e:
|
|
@@ -225,6 +242,7 @@ def log_memory_usage(stage=""):
|
|
| 225 |
|
| 226 |
log_msg += f" | RAM {memory_info['ram_used']:.1f}GB used"
|
| 227 |
|
|
|
|
| 228 |
logger.info(log_msg)
|
| 229 |
return memory_info
|
| 230 |
|
|
@@ -243,6 +261,7 @@ def check_memory_available(required_gb=2.0):
|
|
| 243 |
|
| 244 |
def free_memory_aggressive():
|
| 245 |
"""Aggressively free GPU and system memory."""
|
|
|
|
| 246 |
logger.info("Performing aggressive memory cleanup...")
|
| 247 |
|
| 248 |
# Clear model cache
|
|
@@ -260,5 +279,6 @@ def free_memory_aggressive():
|
|
| 260 |
# System cleanup
|
| 261 |
gc.collect()
|
| 262 |
|
|
|
|
| 263 |
logger.info("Memory cleanup complete")
|
| 264 |
log_memory_usage("after cleanup")
|
|
|
|
| 61 |
def load_sam2_predictor():
|
| 62 |
"""
|
| 63 |
Lazy load SAM2 image predictor with fallback strategies.
|
| 64 |
+
Returns (predictor, device) tuple. Returns (None, None) if loading fails.
|
| 65 |
"""
|
| 66 |
try:
|
| 67 |
print("Loading SAM2 image predictor...", flush=True)
|
|
|
|
| 70 |
|
| 71 |
# Determine device
|
| 72 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 73 |
+
print(f"Using device for SAM2: {device}", flush=True)
|
| 74 |
|
| 75 |
# Try local checkpoints first
|
| 76 |
checkpoint_path = "/home/user/app/checkpoints/sam2.1_hiera_large.pt"
|
|
|
|
| 104 |
sam2_model = build_sam2(model_cfg, checkpoint_path, device=device)
|
| 105 |
predictor = SAM2ImagePredictor(sam2_model)
|
| 106 |
|
| 107 |
+
# CRITICAL: Verify and force model to correct device
|
| 108 |
if hasattr(predictor, 'model'):
|
| 109 |
predictor.model.to(device)
|
| 110 |
+
predictor.model.eval()
|
| 111 |
+
print(f"SAM2 model moved to {device} and set to eval mode", flush=True)
|
| 112 |
|
| 113 |
+
print(f"✅ SAM2 loaded successfully on {device}!", flush=True)
|
| 114 |
+
return predictor, device
|
| 115 |
|
| 116 |
except Exception as e:
|
| 117 |
+
print(f"❌ Failed to load SAM2 predictor: {e}", flush=True)
|
| 118 |
import traceback
|
| 119 |
traceback.print_exc()
|
| 120 |
+
return None, None
|
| 121 |
|
| 122 |
+
# Alias for backward compatibility
|
| 123 |
def load_sam2():
|
| 124 |
+
"""Alias for load_sam2_predictor() - returns just predictor for compatibility"""
|
| 125 |
+
predictor, device = load_sam2_predictor()
|
| 126 |
+
return predictor
|
| 127 |
|
| 128 |
# ============================================================================
|
| 129 |
# MatAnyone Model Loading
|
|
|
|
| 132 |
@st.cache_resource(show_spinner=False)
|
| 133 |
def load_matanyone_processor():
|
| 134 |
"""
|
| 135 |
+
Lazy load MatAnyone processor with explicit GPU placement.
|
| 136 |
+
Returns (processor, device) tuple. Returns (None, None) if loading fails.
|
| 137 |
"""
|
| 138 |
try:
|
| 139 |
print("Loading MatAnyone processor...", flush=True)
|
|
|
|
| 143 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 144 |
print(f"MatAnyone using device: {device}", flush=True)
|
| 145 |
|
| 146 |
+
# Load processor with explicit device
|
| 147 |
processor = InferenceCore("PeiqingYang/MatAnyone", device=device)
|
| 148 |
|
| 149 |
+
# CRITICAL: Verify the processor's model is actually on GPU
|
| 150 |
+
if hasattr(processor, 'model'):
|
| 151 |
+
processor.model.to(device)
|
| 152 |
+
processor.model.eval()
|
| 153 |
+
print(f"MatAnyone model explicitly moved to {device}", flush=True)
|
| 154 |
+
|
| 155 |
+
# Check if processor has device attribute and set it
|
| 156 |
+
if not hasattr(processor, 'device'):
|
| 157 |
+
processor.device = device
|
| 158 |
+
print(f"Set processor.device to {device}", flush=True)
|
| 159 |
+
|
| 160 |
+
print(f"✅ MatAnyone loaded successfully on {device}!", flush=True)
|
| 161 |
+
return processor, device
|
| 162 |
|
| 163 |
except Exception as e:
|
| 164 |
+
print(f"❌ Failed to load MatAnyone: {e}", flush=True)
|
| 165 |
import traceback
|
| 166 |
traceback.print_exc()
|
| 167 |
+
return None, None
|
| 168 |
|
| 169 |
+
# Alias for backward compatibility
|
| 170 |
def load_matanyone():
|
| 171 |
+
"""Alias for load_matanyone_processor() - returns just processor for compatibility"""
|
| 172 |
+
processor, device = load_matanyone_processor()
|
| 173 |
+
return processor
|
| 174 |
|
| 175 |
# ============================================================================
|
| 176 |
# MediaPipe Pose
|
|
|
|
| 196 |
Returns dict with test results.
|
| 197 |
"""
|
| 198 |
results = {
|
| 199 |
+
'sam2': {'loaded': False, 'error': None, 'device': None},
|
| 200 |
+
'matanyone': {'loaded': False, 'error': None, 'device': None}
|
| 201 |
}
|
| 202 |
|
| 203 |
# Test SAM2
|
| 204 |
try:
|
| 205 |
+
sam2_predictor, sam2_device = load_sam2_predictor()
|
| 206 |
if sam2_predictor is not None:
|
| 207 |
results['sam2']['loaded'] = True
|
| 208 |
+
results['sam2']['device'] = sam2_device
|
| 209 |
else:
|
| 210 |
results['sam2']['error'] = "Predictor returned None"
|
| 211 |
except Exception as e:
|
|
|
|
| 213 |
|
| 214 |
# Test MatAnyone
|
| 215 |
try:
|
| 216 |
+
matanyone_processor, matanyone_device = load_matanyone_processor()
|
| 217 |
if matanyone_processor is not None:
|
| 218 |
results['matanyone']['loaded'] = True
|
| 219 |
+
results['matanyone']['device'] = matanyone_device
|
| 220 |
else:
|
| 221 |
results['matanyone']['error'] = "Processor returned None"
|
| 222 |
except Exception as e:
|
|
|
|
| 242 |
|
| 243 |
log_msg += f" | RAM {memory_info['ram_used']:.1f}GB used"
|
| 244 |
|
| 245 |
+
print(log_msg, flush=True)
|
| 246 |
logger.info(log_msg)
|
| 247 |
return memory_info
|
| 248 |
|
|
|
|
| 261 |
|
| 262 |
def free_memory_aggressive():
|
| 263 |
"""Aggressively free GPU and system memory."""
|
| 264 |
+
print("Performing aggressive memory cleanup...", flush=True)
|
| 265 |
logger.info("Performing aggressive memory cleanup...")
|
| 266 |
|
| 267 |
# Clear model cache
|
|
|
|
| 279 |
# System cleanup
|
| 280 |
gc.collect()
|
| 281 |
|
| 282 |
+
print("Memory cleanup complete", flush=True)
|
| 283 |
logger.info("Memory cleanup complete")
|
| 284 |
log_memory_usage("after cleanup")
|