File size: 5,318 Bytes
e59f7b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
--- a/depth_anything_3/app/modules/model_inference.py
+++ b/depth_anything_3/app/modules/model_inference.py
@@ -31,47 +31,67 @@ from depth_anything_3.utils.export.glb import export_to_glb
 from depth_anything_3.utils.export.gs import export_to_gs_video
 
 
+# Global cache for model (used in GPU subprocess)
+# This is safe because @spaces.GPU runs in isolated subprocess
+_MODEL_CACHE = None
+
+
 class ModelInference:
     """
     Handles model inference and data processing for Depth Anything 3.
     """
 
     def __init__(self):
-        """Initialize the model inference handler."""
-        self.model = None
-
-    def initialize_model(self, device: str = "cuda") -> None:
+        """Initialize the model inference handler.
+        
+        Note: Do NOT store model in instance variable to avoid
+        state sharing issues with @spaces.GPU decorator.
+        """
+        pass  # No instance variables
+
+    def initialize_model(self, device: str = "cuda"):
         """
         Initialize the DepthAnything3 model.
+        
+        Uses global cache to store model safely in GPU subprocess.
+        This avoids CUDA initialization in main process.
 
         Args:
             device: Device to load the model on
+            
+        Returns:
+            Model instance
         """
-        if self.model is None:
+        global _MODEL_CACHE
+        
+        if _MODEL_CACHE is None:
             # Get model directory from environment variable or use default
             model_dir = os.environ.get(
                 "DA3_MODEL_DIR", "/dev/shm/da3_models/DA3HF-VITG-METRIC_VITL"
             )
-            self.model = DepthAnything3.from_pretrained(model_dir)
-            self.model = self.model.to(device)
+            print(f"Loading model from {model_dir}...")
+            _MODEL_CACHE = DepthAnything3.from_pretrained(model_dir)
+            _MODEL_CACHE = _MODEL_CACHE.to(device)
+            _MODEL_CACHE.eval()
+            print("Model loaded and moved to GPU")
         else:
-            self.model = self.model.to(device)
-
-        self.model.eval()
+            print("Using cached model")
+            # Ensure model is on correct device
+            _MODEL_CACHE = _MODEL_CACHE.to(device)
+        
+        return _MODEL_CACHE
 
     def run_inference(
         self,
         ...
         # Initialize model if needed
-        self.initialize_model(device)
+        model = self.initialize_model(device)
 
         ...
 
         # Run model inference
         print(f"Running inference with method: {actual_method}")
         with torch.no_grad():
-            prediction = self.model.inference(
+            prediction = model.inference(
                 image_paths, export_dir=None, process_res_method=actual_method, infer_gs=infer_gs
             )
         
@@ -192,6 +212,10 @@ class ModelInference:
         # Process results
         processed_data = self._process_results(target_dir, prediction, image_paths)
 
+        # CRITICAL: Move all CUDA tensors to CPU before returning
+        # This prevents CUDA initialization in main process during unpickling
+        prediction = self._move_prediction_to_cpu(prediction)
+
         # Clean up
         torch.cuda.empty_cache()
 
@@ -282,6 +306,45 @@ class ModelInference:
 
         return processed_data
 
+    def _move_prediction_to_cpu(self, prediction: Any) -> Any:
+        """
+        Move all CUDA tensors in prediction to CPU for safe pickling.
+        
+        This is REQUIRED for HF Spaces with @spaces.GPU decorator to avoid
+        CUDA initialization in the main process during unpickling.
+        
+        Args:
+            prediction: Prediction object that may contain CUDA tensors
+            
+        Returns:
+            Prediction object with all tensors moved to CPU
+        """
+        # Move gaussians tensors to CPU
+        if hasattr(prediction, 'gaussians') and prediction.gaussians is not None:
+            gaussians = prediction.gaussians
+            
+            # Move each tensor attribute to CPU
+            tensor_attrs = ['means', 'scales', 'rotations', 'harmonics', 'opacities']
+            for attr in tensor_attrs:
+                if hasattr(gaussians, attr):
+                    tensor = getattr(gaussians, attr)
+                    if isinstance(tensor, torch.Tensor) and tensor.is_cuda:
+                        setattr(gaussians, attr, tensor.cpu())
+                        print(f"Moved gaussians.{attr} to CPU")
+        
+        # Move any tensors in aux dict to CPU
+        if hasattr(prediction, 'aux') and prediction.aux is not None:
+            for key, value in list(prediction.aux.items()):
+                if isinstance(value, torch.Tensor) and value.is_cuda:
+                    prediction.aux[key] = value.cpu()
+                    print(f"Moved aux['{key}'] to CPU")
+                elif isinstance(value, dict):
+                    # Recursively handle nested dicts
+                    for k, v in list(value.items()):
+                        if isinstance(v, torch.Tensor) and v.is_cuda:
+                            value[k] = v.cpu()
+                            print(f"Moved aux['{key}']['{k}'] to CPU")
+        
+        return prediction
+
     def cleanup(self) -> None:
         """Clean up GPU memory."""