Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,318 Bytes
e59f7b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
--- a/depth_anything_3/app/modules/model_inference.py
+++ b/depth_anything_3/app/modules/model_inference.py
@@ -31,47 +31,67 @@ from depth_anything_3.utils.export.glb import export_to_glb
from depth_anything_3.utils.export.gs import export_to_gs_video
+# Global cache for model (used in GPU subprocess)
+# This is safe because @spaces.GPU runs in isolated subprocess
+_MODEL_CACHE = None
+
+
class ModelInference:
"""
Handles model inference and data processing for Depth Anything 3.
"""
def __init__(self):
- """Initialize the model inference handler."""
- self.model = None
-
- def initialize_model(self, device: str = "cuda") -> None:
+ """Initialize the model inference handler.
+
+ Note: Do NOT store model in instance variable to avoid
+ state sharing issues with @spaces.GPU decorator.
+ """
+ pass # No instance variables
+
+ def initialize_model(self, device: str = "cuda"):
"""
Initialize the DepthAnything3 model.
+
+ Uses global cache to store model safely in GPU subprocess.
+ This avoids CUDA initialization in main process.
Args:
device: Device to load the model on
+
+ Returns:
+ Model instance
"""
- if self.model is None:
+ global _MODEL_CACHE
+
+ if _MODEL_CACHE is None:
# Get model directory from environment variable or use default
model_dir = os.environ.get(
"DA3_MODEL_DIR", "/dev/shm/da3_models/DA3HF-VITG-METRIC_VITL"
)
- self.model = DepthAnything3.from_pretrained(model_dir)
- self.model = self.model.to(device)
+ print(f"Loading model from {model_dir}...")
+ _MODEL_CACHE = DepthAnything3.from_pretrained(model_dir)
+ _MODEL_CACHE = _MODEL_CACHE.to(device)
+ _MODEL_CACHE.eval()
+ print("Model loaded and moved to GPU")
else:
- self.model = self.model.to(device)
-
- self.model.eval()
+ print("Using cached model")
+ # Ensure model is on correct device
+ _MODEL_CACHE = _MODEL_CACHE.to(device)
+
+ return _MODEL_CACHE
def run_inference(
self,
...
# Initialize model if needed
- self.initialize_model(device)
+ model = self.initialize_model(device)
...
# Run model inference
print(f"Running inference with method: {actual_method}")
with torch.no_grad():
- prediction = self.model.inference(
+ prediction = model.inference(
image_paths, export_dir=None, process_res_method=actual_method, infer_gs=infer_gs
)
@@ -192,6 +212,10 @@ class ModelInference:
# Process results
processed_data = self._process_results(target_dir, prediction, image_paths)
+ # CRITICAL: Move all CUDA tensors to CPU before returning
+ # This prevents CUDA initialization in main process during unpickling
+ prediction = self._move_prediction_to_cpu(prediction)
+
# Clean up
torch.cuda.empty_cache()
@@ -282,6 +306,45 @@ class ModelInference:
return processed_data
+ def _move_prediction_to_cpu(self, prediction: Any) -> Any:
+ """
+ Move all CUDA tensors in prediction to CPU for safe pickling.
+
+ This is REQUIRED for HF Spaces with @spaces.GPU decorator to avoid
+ CUDA initialization in the main process during unpickling.
+
+ Args:
+ prediction: Prediction object that may contain CUDA tensors
+
+ Returns:
+ Prediction object with all tensors moved to CPU
+ """
+ # Move gaussians tensors to CPU
+ if hasattr(prediction, 'gaussians') and prediction.gaussians is not None:
+ gaussians = prediction.gaussians
+
+ # Move each tensor attribute to CPU
+ tensor_attrs = ['means', 'scales', 'rotations', 'harmonics', 'opacities']
+ for attr in tensor_attrs:
+ if hasattr(gaussians, attr):
+ tensor = getattr(gaussians, attr)
+ if isinstance(tensor, torch.Tensor) and tensor.is_cuda:
+ setattr(gaussians, attr, tensor.cpu())
+ print(f"Moved gaussians.{attr} to CPU")
+
+ # Move any tensors in aux dict to CPU
+ if hasattr(prediction, 'aux') and prediction.aux is not None:
+ for key, value in list(prediction.aux.items()):
+ if isinstance(value, torch.Tensor) and value.is_cuda:
+ prediction.aux[key] = value.cpu()
+ print(f"Moved aux['{key}'] to CPU")
+ elif isinstance(value, dict):
+ # Recursively handle nested dicts
+ for k, v in list(value.items()):
+ if isinstance(v, torch.Tensor) and v.is_cuda:
+ value[k] = v.cpu()
+ print(f"Moved aux['{key}']['{k}'] to CPU")
+
+ return prediction
+
def cleanup(self) -> None:
"""Clean up GPU memory."""
|