depth-anything-3 / fix_spaces_gpu.patch
linhaotong
update
e59f7b7
raw
history blame
5.32 kB
--- a/depth_anything_3/app/modules/model_inference.py
+++ b/depth_anything_3/app/modules/model_inference.py
@@ -31,47 +31,67 @@ from depth_anything_3.utils.export.glb import export_to_glb
from depth_anything_3.utils.export.gs import export_to_gs_video
+# Global cache for model (used in GPU subprocess)
+# This is safe because @spaces.GPU runs in isolated subprocess
+_MODEL_CACHE = None
+
+
class ModelInference:
"""
Handles model inference and data processing for Depth Anything 3.
"""
def __init__(self):
- """Initialize the model inference handler."""
- self.model = None
-
- def initialize_model(self, device: str = "cuda") -> None:
+ """Initialize the model inference handler.
+
+ Note: Do NOT store model in instance variable to avoid
+ state sharing issues with @spaces.GPU decorator.
+ """
+ pass # No instance variables
+
+ def initialize_model(self, device: str = "cuda"):
"""
Initialize the DepthAnything3 model.
+
+ Uses global cache to store model safely in GPU subprocess.
+ This avoids CUDA initialization in main process.
Args:
device: Device to load the model on
+
+ Returns:
+ Model instance
"""
- if self.model is None:
+ global _MODEL_CACHE
+
+ if _MODEL_CACHE is None:
# Get model directory from environment variable or use default
model_dir = os.environ.get(
"DA3_MODEL_DIR", "/dev/shm/da3_models/DA3HF-VITG-METRIC_VITL"
)
- self.model = DepthAnything3.from_pretrained(model_dir)
- self.model = self.model.to(device)
+ print(f"Loading model from {model_dir}...")
+ _MODEL_CACHE = DepthAnything3.from_pretrained(model_dir)
+ _MODEL_CACHE = _MODEL_CACHE.to(device)
+ _MODEL_CACHE.eval()
+ print("Model loaded and moved to GPU")
else:
- self.model = self.model.to(device)
-
- self.model.eval()
+ print("Using cached model")
+ # Ensure model is on correct device
+ _MODEL_CACHE = _MODEL_CACHE.to(device)
+
+ return _MODEL_CACHE
def run_inference(
self,
...
# Initialize model if needed
- self.initialize_model(device)
+ model = self.initialize_model(device)
...
# Run model inference
print(f"Running inference with method: {actual_method}")
with torch.no_grad():
- prediction = self.model.inference(
+ prediction = model.inference(
image_paths, export_dir=None, process_res_method=actual_method, infer_gs=infer_gs
)
@@ -192,6 +212,10 @@ class ModelInference:
# Process results
processed_data = self._process_results(target_dir, prediction, image_paths)
+ # CRITICAL: Move all CUDA tensors to CPU before returning
+ # This prevents CUDA initialization in main process during unpickling
+ prediction = self._move_prediction_to_cpu(prediction)
+
# Clean up
torch.cuda.empty_cache()
@@ -282,6 +306,45 @@ class ModelInference:
return processed_data
+ def _move_prediction_to_cpu(self, prediction: Any) -> Any:
+ """
+ Move all CUDA tensors in prediction to CPU for safe pickling.
+
+ This is REQUIRED for HF Spaces with @spaces.GPU decorator to avoid
+ CUDA initialization in the main process during unpickling.
+
+ Args:
+ prediction: Prediction object that may contain CUDA tensors
+
+ Returns:
+ Prediction object with all tensors moved to CPU
+ """
+ # Move gaussians tensors to CPU
+ if hasattr(prediction, 'gaussians') and prediction.gaussians is not None:
+ gaussians = prediction.gaussians
+
+ # Move each tensor attribute to CPU
+ tensor_attrs = ['means', 'scales', 'rotations', 'harmonics', 'opacities']
+ for attr in tensor_attrs:
+ if hasattr(gaussians, attr):
+ tensor = getattr(gaussians, attr)
+ if isinstance(tensor, torch.Tensor) and tensor.is_cuda:
+ setattr(gaussians, attr, tensor.cpu())
+ print(f"Moved gaussians.{attr} to CPU")
+
+ # Move any tensors in aux dict to CPU
+ if hasattr(prediction, 'aux') and prediction.aux is not None:
+ for key, value in list(prediction.aux.items()):
+ if isinstance(value, torch.Tensor) and value.is_cuda:
+ prediction.aux[key] = value.cpu()
+ print(f"Moved aux['{key}'] to CPU")
+ elif isinstance(value, dict):
+ # Recursively handle nested dicts
+ for k, v in list(value.items()):
+ if isinstance(v, torch.Tensor) and v.is_cuda:
+ value[k] = v.cpu()
+ print(f"Moved aux['{key}']['{k}'] to CPU")
+
+ return prediction
+
def cleanup(self) -> None:
"""Clean up GPU memory."""