drozdgk commited on
Commit
c00671c
·
1 Parent(s): 52f5401

feat: CPU-compatible dummy pipeline for debugging

Browse files
Files changed (1) hide show
  1. mvp.py +103 -17
mvp.py CHANGED
@@ -22,6 +22,7 @@ import trimesh
22
  import matplotlib.pyplot as plt
23
  import subprocess
24
  import tempfile
 
25
  from huggingface_hub import hf_hub_download
26
 
27
  try:
@@ -62,6 +63,13 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
62
 
63
  print(f"Using device: {device}")
64
 
 
 
 
 
 
 
 
65
 
66
  _VGGT_MODEL = None
67
  _METRIC3D_MODEL = None
@@ -117,7 +125,18 @@ def _init_models():
117
  global _VGGT_MODEL, _METRIC3D_MODEL, _CLIP_MODEL
118
 
119
  if not torch.cuda.is_available():
120
- raise RuntimeError("CUDA недоступна. Для этого Space нужен GPU (CUDA).")
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  if _VGGT_MODEL is None:
123
  print("Initializing and loading VGGT model...")
@@ -154,6 +173,9 @@ def _init_models():
154
  cropformer_name = "Mask2Former_hornet_3x_576d0b.pth"
155
 
156
  def check_weights():
 
 
 
157
  if not os.path.exists(os.path.join(MK_PATH, cropformer_name)):
158
  print(f"Downloading {cropformer_name}...")
159
  os.makedirs(MK_PATH, exist_ok=True)
@@ -195,14 +217,18 @@ def run_model(target_dir, model, metric3d_model=None) -> dict:
195
  """
196
  print(f"Processing images from {target_dir}")
197
 
198
- # Device check
199
  device = "cuda" if torch.cuda.is_available() else "cpu"
200
  if device != "cuda":
201
- raise RuntimeError("CUDA недоступна. Для этого Space нужен GPU (CUDA).")
 
 
 
 
 
 
202
 
203
- # Move model to device
204
- model = model.to(device)
205
- model.eval()
206
 
207
  # Load and preprocess images
208
  image_names = glob.glob(os.path.join(target_dir, "images", "*"))
@@ -211,15 +237,71 @@ def run_model(target_dir, model, metric3d_model=None) -> dict:
211
  if len(image_names) == 0:
212
  raise ValueError("No images found. Check your upload.")
213
 
214
- images = load_and_preprocess_images(image_names).to(device)
215
- print(f"Preprocessed images shape: {images.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- # Run inference
218
  print("Running inference...")
219
  dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 
220
 
221
  with torch.no_grad():
222
- with torch.cuda.amp.autocast(dtype=dtype):
223
  predictions = model(images)
224
 
225
  scale_factor = torch.tensor(1.0, device=device)
@@ -329,7 +411,8 @@ def run_model(target_dir, model, metric3d_model=None) -> dict:
329
  predictions["world_points_from_depth"] = world_points
330
 
331
  # Clean up
332
- torch.cuda.empty_cache()
 
333
  return predictions
334
 
335
 
@@ -343,7 +426,8 @@ def handle_uploads(input_video, input_images):
343
  """
344
  start_time = time.time()
345
  gc.collect()
346
- torch.cuda.empty_cache()
 
347
 
348
  # Create a unique folder name
349
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
@@ -439,7 +523,8 @@ def reconstruct(
439
 
440
  start_time = time.time()
441
  gc.collect()
442
- torch.cuda.empty_cache()
 
443
 
444
  # Prepare frame_filter dropdown
445
  target_dir_images = os.path.join(target_dir, "images")
@@ -814,10 +899,11 @@ def detect_objects(text_labels, target_dir, conf_thres, *viz_args):
814
  return None, "Please enter at least one text label (separated by ';')."
815
 
816
  # Ensure CropFormer weights exist (if detection pipeline uses them)
817
- try:
818
- check_weights()
819
- except Exception as e:
820
- print(f"Warning: could not ensure Mask2Former weights: {e}")
 
821
 
822
  # 1. Run reconstruction first if needed (checking if predictions exist)
823
  predictions_path = os.path.join(target_dir, "predictions.npz")
 
22
  import matplotlib.pyplot as plt
23
  import subprocess
24
  import tempfile
25
+ import contextlib
26
  from huggingface_hub import hf_hub_download
27
 
28
  try:
 
63
 
64
  print(f"Using device: {device}")
65
 
66
+ # CPU debug / compatibility knobs:
67
+ # - On CPU, VGGT-1B inference is usually impractical. For debugging, we fall back to a lightweight
68
+ # dummy pipeline that produces a minimal predictions dict compatible with `predictions_to_glb`.
69
+ ZOO3D_ALLOW_CPU = os.environ.get("ZOO3D_ALLOW_CPU", "1") == "1"
70
+ ZOO3D_CPU_DUMMY = os.environ.get("ZOO3D_CPU_DUMMY", "1") == "1"
71
+ ZOO3D_SKIP_DOWNLOADS = os.environ.get("ZOO3D_SKIP_DOWNLOADS", "0") == "1"
72
+
73
 
74
  _VGGT_MODEL = None
75
  _METRIC3D_MODEL = None
 
125
  global _VGGT_MODEL, _METRIC3D_MODEL, _CLIP_MODEL
126
 
127
  if not torch.cuda.is_available():
128
+ # CPU-friendly mode for debugging: skip heavy models.
129
+ if not ZOO3D_ALLOW_CPU:
130
+ raise RuntimeError("CUDA недоступна. Для этого Space нужен GPU (CUDA).")
131
+ # We still can load CLIP on CPU if needed, but skip VGGT/Metric3D.
132
+ if _CLIP_MODEL is None:
133
+ print("[INFO] loading CLIP model (CPU)...")
134
+ cm, _, _ = open_clip.create_model_and_transforms("ViT-H-14", pretrained="laion2b_s32b_b79k")
135
+ cm.to("cpu")
136
+ cm.eval()
137
+ print("[INFO] finish loading CLIP model (CPU)...")
138
+ globals()["_CLIP_MODEL"] = cm
139
+ return None, None, _CLIP_MODEL
140
 
141
  if _VGGT_MODEL is None:
142
  print("Initializing and loading VGGT model...")
 
173
  cropformer_name = "Mask2Former_hornet_3x_576d0b.pth"
174
 
175
  def check_weights():
176
+ if ZOO3D_SKIP_DOWNLOADS:
177
+ print("[INFO] ZOO3D_SKIP_DOWNLOADS=1: skipping Mask2Former weights download.")
178
+ return
179
  if not os.path.exists(os.path.join(MK_PATH, cropformer_name)):
180
  print(f"Downloading {cropformer_name}...")
181
  os.makedirs(MK_PATH, exist_ok=True)
 
217
  """
218
  print(f"Processing images from {target_dir}")
219
 
220
+ # Device selection
221
  device = "cuda" if torch.cuda.is_available() else "cpu"
222
  if device != "cuda":
223
+ if not ZOO3D_ALLOW_CPU:
224
+ raise RuntimeError("CUDA недоступна. Для этого Space нужен GPU (CUDA).")
225
+ if not ZOO3D_CPU_DUMMY:
226
+ raise RuntimeError(
227
+ "CPU режим включен, но ZOO3D_CPU_DUMMY=0. "
228
+ "Для отладки поставь ZOO3D_CPU_DUMMY=1 или включи GPU."
229
+ )
230
 
231
+ # Load and preprocess images (we need them for both GPU and CPU-dummy)
 
 
232
 
233
  # Load and preprocess images
234
  image_names = glob.glob(os.path.join(target_dir, "images", "*"))
 
237
  if len(image_names) == 0:
238
  raise ValueError("No images found. Check your upload.")
239
 
240
+ # For CPU dummy mode we want the original HxW for `predictions_to_glb` coloring.
241
+ cpu_images_u8 = None
242
+ if device == "cpu":
243
+ imgs = []
244
+ for p in image_names:
245
+ im = cv2.imread(p, cv2.IMREAD_COLOR)
246
+ if im is None:
247
+ continue
248
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
249
+ imgs.append(im)
250
+ if len(imgs) == 0:
251
+ raise ValueError("No readable images found. Check your upload.")
252
+ # Make all images same size for stacking
253
+ H, W = imgs[0].shape[:2]
254
+ imgs2 = []
255
+ for im in imgs:
256
+ if im.shape[:2] != (H, W):
257
+ im = cv2.resize(im, (W, H))
258
+ imgs2.append(im)
259
+ cpu_images_u8 = np.stack(imgs2, axis=0) # (S,H,W,3) uint8
260
+ print(f"CPU dummy: loaded images shape: {cpu_images_u8.shape}")
261
+
262
+ images = load_and_preprocess_images(image_names)
263
+ print(f"Preprocessed images shape: {tuple(images.shape)}")
264
+ if device == "cuda":
265
+ images = images.to(device)
266
+
267
+ if device == "cpu":
268
+ # Dummy predictions for CPU debugging: minimal keys needed by `predictions_to_glb`
269
+ S, H, W = cpu_images_u8.shape[0], cpu_images_u8.shape[1], cpu_images_u8.shape[2]
270
+ # Simple planar point cloud in camera space
271
+ uu, vv = np.meshgrid(np.arange(W), np.arange(H))
272
+ x = (uu - (W / 2.0)) / float(max(W, 1))
273
+ y = -(vv - (H / 2.0)) / float(max(W, 1))
274
+ z = np.ones_like(x, dtype=np.float32) * 1.0
275
+ pts = np.stack([x, y, z], axis=-1).astype(np.float32) # (H,W,3)
276
+ world_points_from_depth = np.repeat(pts[None, ...], S, axis=0) # (S,H,W,3)
277
+ depth = np.ones((S, H, W, 1), dtype=np.float32)
278
+ depth_conf = np.ones((S, H, W), dtype=np.float32)
279
+ extrinsic = np.tile(np.array([[1, 0, 0, 0],
280
+ [0, 1, 0, 0],
281
+ [0, 0, 1, 0]], dtype=np.float32)[None, ...], (S, 1, 1))
282
+ intrinsic = np.tile(np.eye(3, dtype=np.float32)[None, ...], (S, 1, 1))
283
+ pose = np.tile(np.eye(4, dtype=np.float32)[None, ...], (S, 1, 1))
284
+ return {
285
+ "images": cpu_images_u8,
286
+ "extrinsic": extrinsic,
287
+ "intrinsic": intrinsic,
288
+ "pose": pose,
289
+ "depth": depth,
290
+ "depth_conf": depth_conf,
291
+ "world_points_from_depth": world_points_from_depth,
292
+ }
293
+
294
+ # GPU inference
295
+ # Move model to device
296
+ model = model.to(device)
297
+ model.eval()
298
 
 
299
  print("Running inference...")
300
  dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
301
+ amp_ctx = torch.cuda.amp.autocast(dtype=dtype) if device == "cuda" else contextlib.nullcontext()
302
 
303
  with torch.no_grad():
304
+ with amp_ctx:
305
  predictions = model(images)
306
 
307
  scale_factor = torch.tensor(1.0, device=device)
 
411
  predictions["world_points_from_depth"] = world_points
412
 
413
  # Clean up
414
+ if torch.cuda.is_available():
415
+ torch.cuda.empty_cache()
416
  return predictions
417
 
418
 
 
426
  """
427
  start_time = time.time()
428
  gc.collect()
429
+ if torch.cuda.is_available():
430
+ torch.cuda.empty_cache()
431
 
432
  # Create a unique folder name
433
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
 
523
 
524
  start_time = time.time()
525
  gc.collect()
526
+ if torch.cuda.is_available():
527
+ torch.cuda.empty_cache()
528
 
529
  # Prepare frame_filter dropdown
530
  target_dir_images = os.path.join(target_dir, "images")
 
899
  return None, "Please enter at least one text label (separated by ';')."
900
 
901
  # Ensure CropFormer weights exist (if detection pipeline uses them)
902
+ if torch.cuda.is_available() or not ZOO3D_SKIP_DOWNLOADS:
903
+ try:
904
+ check_weights()
905
+ except Exception as e:
906
+ print(f"Warning: could not ensure Mask2Former weights: {e}")
907
 
908
  # 1. Run reconstruction first if needed (checking if predictions exist)
909
  predictions_path = os.path.join(target_dir, "predictions.npz")