Spaces:
Running
on
Zero
Running
on
Zero
| --- a/depth_anything_3/app/modules/model_inference.py | |
| +++ b/depth_anything_3/app/modules/model_inference.py | |
| from depth_anything_3.utils.export.glb import export_to_glb | |
| from depth_anything_3.utils.export.gs import export_to_gs_video | |
| +# Global cache for model (used in GPU subprocess) | |
| +# This is safe because @spaces.GPU runs in isolated subprocess | |
| +_MODEL_CACHE = None | |
| + | |
| + | |
| class ModelInference: | |
| """ | |
| Handles model inference and data processing for Depth Anything 3. | |
| """ | |
| def __init__(self): | |
| - """Initialize the model inference handler.""" | |
| - self.model = None | |
| - | |
| - def initialize_model(self, device: str = "cuda") -> None: | |
| + """Initialize the model inference handler. | |
| + | |
| + Note: Do NOT store model in instance variable to avoid | |
| + state sharing issues with @spaces.GPU decorator. | |
| + """ | |
| + pass # No instance variables | |
| + | |
| + def initialize_model(self, device: str = "cuda"): | |
| """ | |
| Initialize the DepthAnything3 model. | |
| + | |
| + Uses global cache to store model safely in GPU subprocess. | |
| + This avoids CUDA initialization in main process. | |
| Args: | |
| device: Device to load the model on | |
| + | |
| + Returns: | |
| + Model instance | |
| """ | |
| - if self.model is None: | |
| + global _MODEL_CACHE | |
| + | |
| + if _MODEL_CACHE is None: | |
| # Get model directory from environment variable or use default | |
| model_dir = os.environ.get( | |
| "DA3_MODEL_DIR", "/dev/shm/da3_models/DA3HF-VITG-METRIC_VITL" | |
| ) | |
| - self.model = DepthAnything3.from_pretrained(model_dir) | |
| - self.model = self.model.to(device) | |
| + print(f"Loading model from {model_dir}...") | |
| + _MODEL_CACHE = DepthAnything3.from_pretrained(model_dir) | |
| + _MODEL_CACHE = _MODEL_CACHE.to(device) | |
| + _MODEL_CACHE.eval() | |
| + print("Model loaded and moved to GPU") | |
| else: | |
| - self.model = self.model.to(device) | |
| - | |
| - self.model.eval() | |
| + print("Using cached model") | |
| + # Ensure model is on correct device | |
| + _MODEL_CACHE = _MODEL_CACHE.to(device) | |
| + | |
| + return _MODEL_CACHE | |
| def run_inference( | |
| self, | |
| ... | |
| # Initialize model if needed | |
| - self.initialize_model(device) | |
| + model = self.initialize_model(device) | |
| ... | |
| # Run model inference | |
| print(f"Running inference with method: {actual_method}") | |
| with torch.no_grad(): | |
| - prediction = self.model.inference( | |
| + prediction = model.inference( | |
| image_paths, export_dir=None, process_res_method=actual_method, infer_gs=infer_gs | |
| ) | |
| class ModelInference: | |
| # Process results | |
| processed_data = self._process_results(target_dir, prediction, image_paths) | |
| + # CRITICAL: Move all CUDA tensors to CPU before returning | |
| + # This prevents CUDA initialization in main process during unpickling | |
| + prediction = self._move_prediction_to_cpu(prediction) | |
| + | |
| # Clean up | |
| torch.cuda.empty_cache() | |
| class ModelInference: | |
| return processed_data | |
| + def _move_prediction_to_cpu(self, prediction: Any) -> Any: | |
| + """ | |
| + Move all CUDA tensors in prediction to CPU for safe pickling. | |
| + | |
| + This is REQUIRED for HF Spaces with @spaces.GPU decorator to avoid | |
| + CUDA initialization in the main process during unpickling. | |
| + | |
| + Args: | |
| + prediction: Prediction object that may contain CUDA tensors | |
| + | |
| + Returns: | |
| + Prediction object with all tensors moved to CPU | |
| + """ | |
| + # Move gaussians tensors to CPU | |
| + if hasattr(prediction, 'gaussians') and prediction.gaussians is not None: | |
| + gaussians = prediction.gaussians | |
| + | |
| + # Move each tensor attribute to CPU | |
| + tensor_attrs = ['means', 'scales', 'rotations', 'harmonics', 'opacities'] | |
| + for attr in tensor_attrs: | |
| + if hasattr(gaussians, attr): | |
| + tensor = getattr(gaussians, attr) | |
| + if isinstance(tensor, torch.Tensor) and tensor.is_cuda: | |
| + setattr(gaussians, attr, tensor.cpu()) | |
| + print(f"Moved gaussians.{attr} to CPU") | |
| + | |
| + # Move any tensors in aux dict to CPU | |
| + if hasattr(prediction, 'aux') and prediction.aux is not None: | |
| + for key, value in list(prediction.aux.items()): | |
| + if isinstance(value, torch.Tensor) and value.is_cuda: | |
| + prediction.aux[key] = value.cpu() | |
| + print(f"Moved aux['{key}'] to CPU") | |
| + elif isinstance(value, dict): | |
| + # Recursively handle nested dicts | |
| + for k, v in list(value.items()): | |
| + if isinstance(v, torch.Tensor) and v.is_cuda: | |
| + value[k] = v.cpu() | |
| + print(f"Moved aux['{key}']['{k}'] to CPU") | |
| + | |
| + return prediction | |
| + | |
| def cleanup(self) -> None: | |
| """Clean up GPU memory.""" | |