rayli commited on
Commit
e53e51e
·
verified ·
1 Parent(s): c7baf98

Cache mesh-invariant previews and auto-kinematics by mesh hash

Browse files
Files changed (1) hide show
  1. app.py +158 -9
app.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
 
3
  import argparse
4
  import asyncio
 
5
  import json
6
  import os
7
  import shutil
@@ -2603,6 +2604,84 @@ def _upright_preview_paths(gallery_items: list[tuple[str, str]]) -> list[str | N
2603
  return paths[: len(UP_DIR_CHOICES)]
2604
 
2605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2606
  def _set_preview_axes_equal(ax: Any, vertices: np.ndarray) -> None:
2607
  bbox_min = vertices.min(axis=0)
2608
  bbox_max = vertices.max(axis=0)
@@ -2855,9 +2934,16 @@ def _render_up_direction_previews(
2855
  *,
2856
  mesh_path: Path,
2857
  mesh: Any,
 
2858
  ) -> list[tuple[str, str]]:
2859
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
2860
- preview_dir = OUTPUT_ROOT / "up_direction_previews" / f"{mesh_path.stem}_{timestamp}"
 
 
 
 
 
 
2861
  preview_renderer = os.environ.get("UPRIGHT_PREVIEW_RENDERER", "software").strip().lower()
2862
  if preview_renderer == "blender":
2863
  try:
@@ -3023,6 +3109,8 @@ def _normalize_point_prompt_arrays(
3023
  normals: np.ndarray,
3024
  mesh_geometry: Any,
3025
  ) -> tuple[np.ndarray, np.ndarray]:
 
 
3026
  rotation = np.asarray(mesh_geometry.up_dir_rotation, dtype=np.float32)
3027
  rotated_points = np.asarray(points, dtype=np.float32) @ rotation.T
3028
  rotated_normals = np.asarray(normals, dtype=np.float32) @ rotation.T
@@ -3100,6 +3188,9 @@ def _point_prompt_json_from_normalized_prompts(
3100
  normalized_normals: np.ndarray,
3101
  mesh_geometry: Any,
3102
  ) -> str:
 
 
 
3103
  rotation = np.asarray(mesh_geometry.up_dir_rotation, dtype=np.float32)
3104
  points = np.asarray(normalized_points, dtype=np.float32)
3105
  normals = np.asarray(normalized_normals, dtype=np.float32)
@@ -3352,6 +3443,7 @@ class InstructParticulateApp:
3352
  mesh_path = _extract_gradio_path(mesh_value)
3353
  if mesh_path is None:
3354
  yield (
 
3355
  None,
3356
  "Upload a mesh to run inference.",
3357
  "",
@@ -3363,9 +3455,11 @@ class InstructParticulateApp:
3363
  )
3364
  return
3365
  try:
 
3366
  mesh = load_trimesh(mesh_path)
3367
  except Exception as exc:
3368
  yield (
 
3369
  None,
3370
  f"Could not load mesh: {exc}",
3371
  "",
@@ -3377,10 +3471,21 @@ class InstructParticulateApp:
3377
  )
3378
  return
3379
 
3380
- rendering_orientation_previews = _upright_rendering_preview_paths()
 
 
 
 
 
 
 
 
 
 
3381
  yield (
3382
  str(mesh_path),
3383
- f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Rendering upright orientation previews...",
 
3384
  "",
3385
  *rendering_orientation_previews,
3386
  "",
@@ -3394,6 +3499,7 @@ class InstructParticulateApp:
3394
  except Exception as exc:
3395
  yield (
3396
  str(mesh_path),
 
3397
  f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Could not prepare point picker mesh: {exc}",
3398
  "",
3399
  *rendering_orientation_previews,
@@ -3406,7 +3512,8 @@ class InstructParticulateApp:
3406
 
3407
  yield (
3408
  str(mesh_path),
3409
- f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Point picker ready. Rendering upright orientation previews...",
 
3410
  prompt_mesh_data,
3411
  *rendering_orientation_previews,
3412
  "",
@@ -3414,16 +3521,31 @@ class InstructParticulateApp:
3414
  gr.update(interactive=False),
3415
  gr.update(value=None, interactive=False),
3416
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
3417
 
3418
  try:
3419
  upright_previews = _render_up_direction_previews(
3420
  mesh_path=mesh_path,
3421
  mesh=mesh,
 
3422
  )
3423
  except Exception as exc:
3424
  traceback.print_exc()
3425
  yield (
3426
  str(mesh_path),
 
3427
  f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Upright-view previews could not be rendered: {exc}",
3428
  prompt_mesh_data,
3429
  *empty_orientation_previews,
@@ -3436,6 +3558,7 @@ class InstructParticulateApp:
3436
 
3437
  yield (
3438
  str(mesh_path),
 
3439
  f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Select the upright orientation before inference.",
3440
  prompt_mesh_data,
3441
  *_upright_preview_paths(upright_previews),
@@ -3448,6 +3571,7 @@ class InstructParticulateApp:
3448
  def extract_kinematic_structure(
3449
  self,
3450
  mesh_path_value: Any,
 
3451
  current_kinematic_tree_json: str,
3452
  current_point_prompt_json: str,
3453
  up_dir: str,
@@ -3475,8 +3599,24 @@ class InstructParticulateApp:
3475
  gallery_items: list[tuple[str, str]] = []
3476
  try:
3477
  canonical_up = canonicalize_up_dir(up_dir)
3478
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
3479
- output_dir = self.output_root / f"{mesh_path.stem}_auto_{timestamp}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3480
  auto_output_dir = output_dir / "auto_kinematics"
3481
  renders_dir = auto_output_dir / "renders"
3482
  auto_output_dir.mkdir(parents=True, exist_ok=True)
@@ -3566,6 +3706,7 @@ class InstructParticulateApp:
3566
  extracted_prompt_json + "\n",
3567
  encoding="utf-8",
3568
  )
 
3569
 
3570
  yield (
3571
  extracted_tree_json,
@@ -3587,6 +3728,7 @@ class InstructParticulateApp:
3587
  def predict_segmentation_payload(
3588
  self,
3589
  mesh_path_value: Any,
 
3590
  kinematic_tree_json: str,
3591
  point_prompt_json: str,
3592
  up_dir: str,
@@ -3635,8 +3777,8 @@ class InstructParticulateApp:
3635
  )
3636
  return
3637
  canonical_up = canonicalize_up_dir(up_dir)
3638
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
3639
- output_dir = self.output_root / f"{mesh_path.stem}_{timestamp}"
3640
  output_dir.mkdir(parents=True, exist_ok=True)
3641
  args = _make_inference_args(
3642
  output_dir=output_dir,
@@ -4273,6 +4415,7 @@ def _get_active_app() -> InstructParticulateApp:
4273
  @_spaces_gpu
4274
  def run_predict_on_gpu(
4275
  mesh_path_value: Any,
 
4276
  kinematic_tree_json: str,
4277
  point_prompt_json: str,
4278
  up_dir: str,
@@ -4286,6 +4429,7 @@ def run_predict_on_gpu(
4286
  ):
4287
  yield from _get_active_app().predict_segmentation_payload(
4288
  mesh_path_value,
 
4289
  kinematic_tree_json,
4290
  point_prompt_json,
4291
  up_dir,
@@ -4415,6 +4559,7 @@ def create_gradio_app(app: InstructParticulateApp) -> gr.Blocks:
4415
  )
4416
 
4417
  loaded_mesh_path = gr.State(None)
 
4418
  selected_up_dir = gr.Textbox(
4419
  value="",
4420
  label="Selected Upright Direction",
@@ -4588,6 +4733,7 @@ def create_gradio_app(app: InstructParticulateApp) -> gr.Blocks:
4588
  inputs=[input_mesh],
4589
  outputs=[
4590
  loaded_mesh_path,
 
4591
  mesh_status,
4592
  point_prompt_mesh_data,
4593
  *upright_preview_images,
@@ -4607,6 +4753,7 @@ def create_gradio_app(app: InstructParticulateApp) -> gr.Blocks:
4607
  inputs=[input_mesh],
4608
  outputs=[
4609
  loaded_mesh_path,
 
4610
  mesh_status,
4611
  point_prompt_mesh_data,
4612
  *upright_preview_images,
@@ -4620,6 +4767,7 @@ def create_gradio_app(app: InstructParticulateApp) -> gr.Blocks:
4620
  fn=app.extract_kinematic_structure,
4621
  inputs=[
4622
  input_mesh,
 
4623
  kinematic_tree,
4624
  point_prompts,
4625
  selected_up_dir,
@@ -4647,6 +4795,7 @@ def create_gradio_app(app: InstructParticulateApp) -> gr.Blocks:
4647
  fn=run_predict_on_gpu,
4648
  inputs=[
4649
  input_mesh,
 
4650
  kinematic_tree,
4651
  point_prompts,
4652
  selected_up_dir,
 
2
 
3
  import argparse
4
  import asyncio
5
+ import hashlib
6
  import json
7
  import os
8
  import shutil
 
2604
  return paths[: len(UP_DIR_CHOICES)]
2605
 
2606
 
2607
+ def _mesh_file_sha256(mesh_path: Path) -> str:
2608
+ digest = hashlib.sha256()
2609
+ with mesh_path.open("rb") as file:
2610
+ for chunk in iter(lambda: file.read(1024 * 1024), b""):
2611
+ digest.update(chunk)
2612
+ return digest.hexdigest()
2613
+
2614
+
2615
+ def _mesh_cache_dir(mesh_hash: str) -> Path:
2616
+ return OUTPUT_ROOT / "mesh_cache" / str(mesh_hash)
2617
+
2618
+
2619
+ def _timestamped_mesh_output_dir(output_root: Path, mesh_hash: str, suffix: str = "") -> Path:
2620
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
2621
+ name_parts = [str(mesh_hash)]
2622
+ if suffix:
2623
+ name_parts.append(str(suffix))
2624
+ name_parts.append(timestamp)
2625
+ return output_root / "_".join(name_parts)
2626
+
2627
+
2628
+ def _upright_preview_cache_dir(mesh_hash: str) -> Path:
2629
+ return _mesh_cache_dir(mesh_hash) / "upright_previews"
2630
+
2631
+
2632
+ def _cached_upright_preview_items(mesh_hash: str) -> list[tuple[str, str]] | None:
2633
+ preview_dir = _upright_preview_cache_dir(mesh_hash)
2634
+ gallery_items: list[tuple[str, str]] = []
2635
+ for up_dir in UP_DIR_CHOICES:
2636
+ output_path = preview_dir / f"up_{_up_dir_slug(up_dir)}.png"
2637
+ if not output_path.exists():
2638
+ return None
2639
+ gallery_items.append((str(output_path), f"{up_dir} up"))
2640
+ return gallery_items
2641
+
2642
+
2643
+ def _auto_kinematics_cache_dir(mesh_hash: str, up_dir: str) -> Path:
2644
+ return _mesh_cache_dir(mesh_hash) / "auto_kinematics"
2645
+
2646
+
2647
+ def _cached_auto_kinematics(
2648
+ cache_dir: Path,
2649
+ ) -> tuple[str, str, list[tuple[str, str]]] | None:
2650
+ tree_path = cache_dir / "demo_kinematic_tree.json"
2651
+ prompt_path = cache_dir / "demo_point_prompts.json"
2652
+ renders_dir = cache_dir / "renders"
2653
+ if not tree_path.exists() or not prompt_path.exists() or not renders_dir.exists():
2654
+ return None
2655
+ render_paths = sorted(renders_dir.glob("view_*.png"))
2656
+ if len(render_paths) == 0:
2657
+ return None
2658
+ overlay_paths = sorted((cache_dir / "point_prompt_overlays").glob("*.png"))
2659
+ gallery_items = [
2660
+ (str(render_path), f"Render {index}")
2661
+ for index, render_path in enumerate(render_paths)
2662
+ ]
2663
+ gallery_items.extend(
2664
+ (str(overlay_path), f"Prompt overlay {overlay_path.stem}")
2665
+ for overlay_path in overlay_paths
2666
+ )
2667
+ return (
2668
+ tree_path.read_text(encoding="utf-8").strip(),
2669
+ prompt_path.read_text(encoding="utf-8").strip(),
2670
+ gallery_items,
2671
+ )
2672
+
2673
+
2674
+ def _store_auto_kinematics_cache(source_dir: Path, cache_dir: Path) -> None:
2675
+ temp_dir = cache_dir.parent / f".{cache_dir.name}.tmp_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
2676
+ if temp_dir.exists():
2677
+ shutil.rmtree(temp_dir)
2678
+ temp_dir.parent.mkdir(parents=True, exist_ok=True)
2679
+ shutil.copytree(source_dir, temp_dir)
2680
+ if cache_dir.exists():
2681
+ shutil.rmtree(cache_dir)
2682
+ temp_dir.rename(cache_dir)
2683
+
2684
+
2685
  def _set_preview_axes_equal(ax: Any, vertices: np.ndarray) -> None:
2686
  bbox_min = vertices.min(axis=0)
2687
  bbox_max = vertices.max(axis=0)
 
2934
  *,
2935
  mesh_path: Path,
2936
  mesh: Any,
2937
+ mesh_hash: str | None = None,
2938
  ) -> list[tuple[str, str]]:
2939
+ if mesh_hash is not None:
2940
+ cached_items = _cached_upright_preview_items(mesh_hash)
2941
+ if cached_items is not None:
2942
+ return cached_items
2943
+ preview_dir = _upright_preview_cache_dir(mesh_hash)
2944
+ else:
2945
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
2946
+ preview_dir = OUTPUT_ROOT / "up_direction_previews" / f"{mesh_path.stem}_{timestamp}"
2947
  preview_renderer = os.environ.get("UPRIGHT_PREVIEW_RENDERER", "software").strip().lower()
2948
  if preview_renderer == "blender":
2949
  try:
 
3109
  normals: np.ndarray,
3110
  mesh_geometry: Any,
3111
  ) -> tuple[np.ndarray, np.ndarray]:
3112
+ # Point prompt JSON stores raw upload-space coordinates; inference uses the
3113
+ # same upright transform as the input mesh before normalization.
3114
  rotation = np.asarray(mesh_geometry.up_dir_rotation, dtype=np.float32)
3115
  rotated_points = np.asarray(points, dtype=np.float32) @ rotation.T
3116
  rotated_normals = np.asarray(normals, dtype=np.float32) @ rotation.T
 
3188
  normalized_normals: np.ndarray,
3189
  mesh_geometry: Any,
3190
  ) -> str:
3191
+ # Auto-kinematics lifting produces normalized model-space prompts. Store
3192
+ # them back in raw upload-space coordinates so cached prompts are reusable
3193
+ # across later upright-orientation choices.
3194
  rotation = np.asarray(mesh_geometry.up_dir_rotation, dtype=np.float32)
3195
  points = np.asarray(normalized_points, dtype=np.float32)
3196
  normals = np.asarray(normalized_normals, dtype=np.float32)
 
3443
  mesh_path = _extract_gradio_path(mesh_value)
3444
  if mesh_path is None:
3445
  yield (
3446
+ None,
3447
  None,
3448
  "Upload a mesh to run inference.",
3449
  "",
 
3455
  )
3456
  return
3457
  try:
3458
+ mesh_hash = _mesh_file_sha256(mesh_path)
3459
  mesh = load_trimesh(mesh_path)
3460
  except Exception as exc:
3461
  yield (
3462
+ None,
3463
  None,
3464
  f"Could not load mesh: {exc}",
3465
  "",
 
3471
  )
3472
  return
3473
 
3474
+ cached_upright_previews = _cached_upright_preview_items(mesh_hash)
3475
+ rendering_orientation_previews = (
3476
+ _upright_preview_paths(cached_upright_previews)
3477
+ if cached_upright_previews is not None
3478
+ else _upright_rendering_preview_paths()
3479
+ )
3480
+ preview_status = (
3481
+ "Using cached upright orientation previews."
3482
+ if cached_upright_previews is not None
3483
+ else "Rendering upright orientation previews..."
3484
+ )
3485
  yield (
3486
  str(mesh_path),
3487
+ mesh_hash,
3488
+ f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. {preview_status}",
3489
  "",
3490
  *rendering_orientation_previews,
3491
  "",
 
3499
  except Exception as exc:
3500
  yield (
3501
  str(mesh_path),
3502
+ mesh_hash,
3503
  f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Could not prepare point picker mesh: {exc}",
3504
  "",
3505
  *rendering_orientation_previews,
 
3512
 
3513
  yield (
3514
  str(mesh_path),
3515
+ mesh_hash,
3516
+ f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Point picker ready. {preview_status}",
3517
  prompt_mesh_data,
3518
  *rendering_orientation_previews,
3519
  "",
 
3521
  gr.update(interactive=False),
3522
  gr.update(value=None, interactive=False),
3523
  )
3524
+ if cached_upright_previews is not None:
3525
+ yield (
3526
+ str(mesh_path),
3527
+ mesh_hash,
3528
+ f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Select the upright orientation before inference.",
3529
+ prompt_mesh_data,
3530
+ *_upright_preview_paths(cached_upright_previews),
3531
+ "",
3532
+ None,
3533
+ gr.update(interactive=False),
3534
+ gr.update(value=None, interactive=False),
3535
+ )
3536
+ return
3537
 
3538
  try:
3539
  upright_previews = _render_up_direction_previews(
3540
  mesh_path=mesh_path,
3541
  mesh=mesh,
3542
+ mesh_hash=mesh_hash,
3543
  )
3544
  except Exception as exc:
3545
  traceback.print_exc()
3546
  yield (
3547
  str(mesh_path),
3548
+ mesh_hash,
3549
  f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Upright-view previews could not be rendered: {exc}",
3550
  prompt_mesh_data,
3551
  *empty_orientation_previews,
 
3558
 
3559
  yield (
3560
  str(mesh_path),
3561
+ mesh_hash,
3562
  f"Loaded {mesh_path.name}: {len(mesh.faces)} faces. Select the upright orientation before inference.",
3563
  prompt_mesh_data,
3564
  *_upright_preview_paths(upright_previews),
 
3571
  def extract_kinematic_structure(
3572
  self,
3573
  mesh_path_value: Any,
3574
+ mesh_hash_value: Any,
3575
  current_kinematic_tree_json: str,
3576
  current_point_prompt_json: str,
3577
  up_dir: str,
 
3599
  gallery_items: list[tuple[str, str]] = []
3600
  try:
3601
  canonical_up = canonicalize_up_dir(up_dir)
3602
+ mesh_hash = str(mesh_hash_value or _mesh_file_sha256(mesh_path))
3603
+ cache_dir = _auto_kinematics_cache_dir(mesh_hash, canonical_up)
3604
+ cached_auto = _cached_auto_kinematics(cache_dir)
3605
+ if cached_auto is not None:
3606
+ extracted_tree_json, extracted_prompt_json, gallery_items = cached_auto
3607
+ yield (
3608
+ extracted_tree_json,
3609
+ extracted_prompt_json,
3610
+ gallery_items,
3611
+ "Success (cached)",
3612
+ )
3613
+ return
3614
+
3615
+ output_dir = _timestamped_mesh_output_dir(
3616
+ self.output_root,
3617
+ mesh_hash,
3618
+ suffix="auto",
3619
+ )
3620
  auto_output_dir = output_dir / "auto_kinematics"
3621
  renders_dir = auto_output_dir / "renders"
3622
  auto_output_dir.mkdir(parents=True, exist_ok=True)
 
3706
  extracted_prompt_json + "\n",
3707
  encoding="utf-8",
3708
  )
3709
+ _store_auto_kinematics_cache(auto_output_dir, cache_dir)
3710
 
3711
  yield (
3712
  extracted_tree_json,
 
3728
  def predict_segmentation_payload(
3729
  self,
3730
  mesh_path_value: Any,
3731
+ mesh_hash_value: Any,
3732
  kinematic_tree_json: str,
3733
  point_prompt_json: str,
3734
  up_dir: str,
 
3777
  )
3778
  return
3779
  canonical_up = canonicalize_up_dir(up_dir)
3780
+ mesh_hash = str(mesh_hash_value or _mesh_file_sha256(mesh_path))
3781
+ output_dir = _timestamped_mesh_output_dir(self.output_root, mesh_hash)
3782
  output_dir.mkdir(parents=True, exist_ok=True)
3783
  args = _make_inference_args(
3784
  output_dir=output_dir,
 
4415
  @_spaces_gpu
4416
  def run_predict_on_gpu(
4417
  mesh_path_value: Any,
4418
+ mesh_hash_value: Any,
4419
  kinematic_tree_json: str,
4420
  point_prompt_json: str,
4421
  up_dir: str,
 
4429
  ):
4430
  yield from _get_active_app().predict_segmentation_payload(
4431
  mesh_path_value,
4432
+ mesh_hash_value,
4433
  kinematic_tree_json,
4434
  point_prompt_json,
4435
  up_dir,
 
4559
  )
4560
 
4561
  loaded_mesh_path = gr.State(None)
4562
+ loaded_mesh_hash = gr.State(None)
4563
  selected_up_dir = gr.Textbox(
4564
  value="",
4565
  label="Selected Upright Direction",
 
4733
  inputs=[input_mesh],
4734
  outputs=[
4735
  loaded_mesh_path,
4736
+ loaded_mesh_hash,
4737
  mesh_status,
4738
  point_prompt_mesh_data,
4739
  *upright_preview_images,
 
4753
  inputs=[input_mesh],
4754
  outputs=[
4755
  loaded_mesh_path,
4756
+ loaded_mesh_hash,
4757
  mesh_status,
4758
  point_prompt_mesh_data,
4759
  *upright_preview_images,
 
4767
  fn=app.extract_kinematic_structure,
4768
  inputs=[
4769
  input_mesh,
4770
+ loaded_mesh_hash,
4771
  kinematic_tree,
4772
  point_prompts,
4773
  selected_up_dir,
 
4795
  fn=run_predict_on_gpu,
4796
  inputs=[
4797
  input_mesh,
4798
+ loaded_mesh_hash,
4799
  kinematic_tree,
4800
  point_prompts,
4801
  selected_up_dir,