luh1124 commited on
Commit
88a246f
·
1 Parent(s): 61c4e38

refactor: merge ③④ GPU callbacks for ZeroGPU stability

Browse files

- Merged generate_videos() and export_pbr_glb() into single generate_renderings() callback
- Avoids CUDA context resets between steps by keeping renderer/tone_mapper valid
- Improved CUDA resource cleanup: explicit del() instead of just None assignment
- Enhanced _ensure_near_on_cuda() with better old object deletion
- Enhanced _teardown_near() with exception handling for robust cleanup
- Updated UI: combined button shows videos + GLB in one action
- Duration increased to 600s to accommodate merged workload

Files changed (1) hide show
  1. app.py +128 -94
app.py CHANGED
@@ -1,18 +1,20 @@
1
  """
2
- NeAR Gradio Space — ZeroGPU-compatible pipeline.
3
 
4
  ZeroGPU contract: every @spaces.GPU call gets a **fresh** CUDA context.
5
  Any CUDA-backed object (nvdiffrast renderer, tone_mapper) from a previous
6
- call is invalid in the next call. Fix: always teardown renderer/tone_mapper
7
- before returning, re-init at the top of each GPU callback.
8
 
9
- UI: 4 linear steps
10
- ① Generate Geometry (Hunyuan3D)
11
- ② Generate SLaT (NeAR — needs mesh from ①)
12
- ③ Generate Videos (NeAR camera orbit + HDRI rotation)
13
- Export PBR GLB (NeAR baked PBR mesh)
 
 
 
 
14
 
15
- Session state = file paths only. No in-memory SLAT/mesh between callbacks.
16
  CPU preload runs in a background daemon thread at Space startup (no GPU lease).
17
  """
18
  from __future__ import annotations
@@ -170,22 +172,69 @@ def _ensure_near_on_cuda() -> NeARImageToRelightable3DPipeline:
170
  # ZeroGPU runs one GPU callback at a time so no lock is needed.
171
  _load_near_cpu_locked()
172
  assert PIPELINE is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  if torch.cuda.is_available():
174
  PIPELINE.to("cuda")
175
- # Always re-init: previous callback's CUDA context is gone.
 
 
176
  PIPELINE.setup_renderer()
177
  PIPELINE.setup_tone_mapper("AgX")
178
  return PIPELINE
179
 
180
 
181
  def _teardown_near() -> None:
182
- """Release CUDA-backed objects; move weights back to CPU."""
 
 
 
 
183
  if PIPELINE is None:
184
  return
185
- PIPELINE.renderer = None
186
- PIPELINE.tone_mapper = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  if torch.cuda.is_available():
188
  PIPELINE.to("cpu")
 
189
  _free_cuda()
190
 
191
 
@@ -320,7 +369,7 @@ def end_session(req: gr.Request) -> None:
320
  # ── GPU callbacks ─────────────────────────────────────────────────────────────
321
 
322
  @_gpu(duration=240)
323
- @torch.inference_mode()
324
  def generate_geometry(
325
  image_input: Optional[Image.Image],
326
  req: gr.Request,
@@ -356,7 +405,7 @@ def generate_geometry(
356
 
357
 
358
  @_gpu(duration=240)
359
- @torch.inference_mode()
360
  def generate_slat(
361
  asset_state: Dict[str, Any],
362
  image_input: Optional[Image.Image],
@@ -421,9 +470,9 @@ def load_slat_file(
421
  return state, f"SLaT loaded: `{Path(resolved).name}`"
422
 
423
 
424
- @_gpu(duration=360)
425
- @torch.inference_mode()
426
- def generate_videos(
427
  asset_state: Dict[str, Any],
428
  hdri_file_obj: Any,
429
  hdri_rot: float,
@@ -434,22 +483,42 @@ def generate_videos(
434
  pitch: float,
435
  fov: float,
436
  radius: float,
 
 
437
  req: gr.Request,
438
  progress: gr.Progress = gr.Progress(track_tqdm=True),
439
- ) -> tuple[str, str, str]:
440
- """③ Camera-orbit video + HDRI-rotation video (one GPU call)."""
 
 
 
 
441
  slat_path = _require_slat(asset_state)
442
  hdri_path = _require_hdri(hdri_file_obj)
443
  session_dir = CACHE_DIR / str(req.session_hash)
444
 
 
445
  progress(0.05, desc="Loading NeAR on GPU")
446
  pipe = _ensure_near_on_cuda()
447
 
448
- progress(0.1, desc="Loading SLaT / HDRI")
449
  slat = pipe.load_slat(slat_path)
450
  hdri_np = _load_hdri_resized(pipe, hdri_path)
451
 
452
- progress(0.15, desc="Rendering camera-orbit video…")
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  cam_frames = pipe.render_camera_path_video(
454
  slat, hdri_np,
455
  num_views=int(num_cam),
@@ -463,7 +532,7 @@ def generate_videos(
463
  p_cam = session_dir / "video_camera_orbit.mp4"
464
  imageio.mimsave(p_cam, cam_frames, fps=int(fps))
465
  del cam_frames
466
- _free_cuda()
467
 
468
  progress(0.55, desc="Rendering HDRI-rotation video…")
469
  roll_frames, hdri_frames = pipe.render_hdri_rotation_video(
@@ -480,10 +549,36 @@ def generate_videos(
480
  p_roll = session_dir / "video_env_roll.mp4"
481
  imageio.mimsave(p_hdri, hdri_frames, fps=int(fps))
482
  imageio.mimsave(p_roll, roll_frames, fps=int(fps))
483
- del hdri_frames, roll_frames, slat, hdri_np
 
484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  _teardown_near()
486
- return str(p_cam), str(p_hdri), str(p_roll)
 
 
 
 
 
 
487
 
488
 
489
  def _hunyuan_mesh_to_renderer_space(mesh: trimesh.Trimesh) -> trimesh.Trimesh:
@@ -508,65 +603,6 @@ def _hunyuan_mesh_to_renderer_space(mesh: trimesh.Trimesh) -> trimesh.Trimesh:
508
  flush=True,
509
  )
510
  return mesh
511
-
512
-
513
- @_gpu(duration=180)
514
- def export_pbr_glb(
515
- asset_state: Dict[str, Any],
516
- hdri_file_obj: Any,
517
- hdri_rot: float,
518
- simplify: float,
519
- texture_size: int,
520
- req: gr.Request,
521
- progress: gr.Progress = gr.Progress(track_tqdm=True),
522
- ) -> tuple[str, str]:
523
- """④ Export PBR GLB with baked textures, using Hunyuan mesh as base."""
524
- slat_path = _require_slat(asset_state)
525
- hdri_path = _require_hdri(hdri_file_obj)
526
- session_dir = CACHE_DIR / str(req.session_hash)
527
-
528
- # Use Hunyuan geometry as base mesh when available; fall back to TRELLIS decoder.
529
- mesh_path = asset_state.get("mesh_path")
530
- base_mesh: Optional[trimesh.Trimesh] = None
531
- if mesh_path and os.path.isfile(mesh_path):
532
- raw = trimesh.load(mesh_path, force="mesh")
533
- print(
534
- f"[NeAR] raw Hunyuan mesh: bounds={raw.bounds[0].round(3)} → {raw.bounds[1].round(3)}",
535
- flush=True,
536
- )
537
- base_mesh = _hunyuan_mesh_to_renderer_space(raw)
538
- del raw
539
- else:
540
- print("[NeAR] no mesh_path in state — falling back to slat_decoder_mesh", flush=True)
541
-
542
- progress(0.1, desc="Loading NeAR on GPU")
543
- pipe = _ensure_near_on_cuda()
544
-
545
- progress(0.2, desc="Loading SLaT / HDRI")
546
- slat = pipe.load_slat(slat_path)
547
- hdri_np = _load_hdri_resized(pipe, hdri_path)
548
-
549
- progress(0.4, desc="Baking PBR GLB…")
550
- glb = pipe.export_glb_from_slat(
551
- slat, hdri_np,
552
- hdri_rot_deg=float(hdri_rot),
553
- base_mesh=base_mesh,
554
- simplify=float(simplify),
555
- texture_size=int(texture_size),
556
- fill_holes=True,
557
- )
558
- del slat, hdri_np
559
- _free_cuda()
560
-
561
- out_path = session_dir / "near_pbr.glb"
562
- glb.export(out_path)
563
- del glb
564
-
565
- _teardown_near()
566
- return str(out_path), f"**④ PBR GLB** → `{out_path.name}`"
567
-
568
-
569
- def clear_cache(req: gr.Request) -> str:
570
  session_dir = CACHE_DIR / str(req.session_hash)
571
  shutil.rmtree(session_dir, ignore_errors=True)
572
  session_dir.mkdir(parents=True, exist_ok=True)
@@ -675,8 +711,8 @@ def build_app() -> gr.Blocks:
675
  asset_state = gr.State({})
676
 
677
  gr.Markdown(
678
- "## NeAR — Relightable 3D (ZeroGPU)\n"
679
- "**① Geometry** → **② SLaT** → **③ Videos** and/or **④ PBR GLB**\n\n"
680
  "Tip: after generating geometry, swap the input image before running **② SLaT** "
681
  "to apply a different texture style to the same shape — geometry and appearance are decoupled."
682
  )
@@ -717,8 +753,8 @@ def build_app() -> gr.Blocks:
717
  hdri_rot = gr.Slider(0, 360, value=0, step=1, label="HDRI rotation °")
718
 
719
  gr.HTML('<p class="section-kicker" style="margin:2px 0 2px;padding:0">Actions</p>')
720
- btn_videos = gr.Button("③ Generate Videos", variant="primary")
721
- btn_glb = gr.Button("④ Export PBR GLB", variant="primary")
722
  with gr.Accordion("Video / export settings", open=False):
723
  fps = gr.Slider(8, 48, value=24, step=1, label="FPS")
724
  num_cam = gr.Slider(8, 96, value=36, step=4, label="Camera-orbit frames")
@@ -784,12 +820,10 @@ def build_app() -> gr.Blocks:
784
  [asset_state, status])
785
  btn_load_slat.click(load_slat_file, [slat_upload, slat_path_txt],
786
  [asset_state, status])
787
- btn_videos.click(generate_videos,
788
- [asset_state, hdri_file, hdri_rot, fps, num_cam, num_hdri, yaw, pitch, fov, radius],
789
- [vid_cam, vid_hdri, vid_roll])
790
- btn_glb.click(export_pbr_glb,
791
- [asset_state, hdri_file, hdri_rot, simplify, tex_size],
792
- [glb_view, status])
793
  btn_clear.click(clear_cache, [], [status])
794
 
795
  return demo
 
1
  """
2
+ NeAR Gradio Space — ZeroGPU-compatible pipeline (REFACTORED).
3
 
4
  ZeroGPU contract: every @spaces.GPU call gets a **fresh** CUDA context.
5
  Any CUDA-backed object (nvdiffrast renderer, tone_mapper) from a previous
6
+ call is invalid in the next call.
 
7
 
8
+ REFACTORED ARCHITECTURE:
9
+ - ① Generate Geometry (Hunyuan3D) — independent GPU callback
10
+ - ② Generate SLaT (NeAR)independent GPU callback
11
+ - MERGED: Generate Videos + Export PBR GLB SINGLE GPU callback
12
+ * Avoids cross-callback CUDA context resets
13
+ * Renderer/tone_mapper stay valid throughout rendering and export
14
+
15
+ Session state = file paths only. No in-memory objects retained between
16
+ steps (except within the unified ③④ callback).
17
 
 
18
  CPU preload runs in a background daemon thread at Space startup (no GPU lease).
19
  """
20
  from __future__ import annotations
 
172
  # ZeroGPU runs one GPU callback at a time so no lock is needed.
173
  _load_near_cpu_locked()
174
  assert PIPELINE is not None
175
+
176
+ # ── Cleanup old CUDA objects before re-init ─────────────────────────
177
+ # Each @GPU callback has a NEW CUDA context; old renderer/tone_mapper
178
+ # pointers are invalid and must be fully deleted, not just set to None.
179
+ if hasattr(PIPELINE, 'renderer') and PIPELINE.renderer is not None:
180
+ try:
181
+ del PIPELINE.renderer
182
+ except Exception as e:
183
+ print(f"[NeAR] warning: failed to delete old renderer: {e}", flush=True)
184
+ PIPELINE.renderer = None
185
+
186
+ if hasattr(PIPELINE, 'tone_mapper') and PIPELINE.tone_mapper is not None:
187
+ try:
188
+ del PIPELINE.tone_mapper
189
+ except Exception as e:
190
+ print(f"[NeAR] warning: failed to delete old tone_mapper: {e}", flush=True)
191
+ PIPELINE.tone_mapper = None
192
+
193
+ _free_cuda() # Clear any lingering GPU memory
194
+
195
+ # ── Move to new CUDA context and re-init fresh objects ──────────────
196
  if torch.cuda.is_available():
197
  PIPELINE.to("cuda")
198
+
199
+ # Initialize fresh renderer/tone_mapper in the new CUDA context
200
+ print("[NeAR] initializing renderer/tone_mapper in fresh CUDA context…", flush=True)
201
  PIPELINE.setup_renderer()
202
  PIPELINE.setup_tone_mapper("AgX")
203
  return PIPELINE
204
 
205
 
206
  def _teardown_near() -> None:
207
+ """Release CUDA-backed objects; move weights back to CPU.
208
+
209
+ ZeroGPU contract: always teardown CUDA objects before exiting @GPU callback.
210
+ The next callback will get a completely fresh CUDA context.
211
+ """
212
  if PIPELINE is None:
213
  return
214
+
215
+ print("[NeAR] tearing down renderer/tone_mapper…", flush=True)
216
+
217
+ # Explicitly delete CUDA objects (not just None assignment)
218
+ if hasattr(PIPELINE, 'renderer'):
219
+ try:
220
+ if PIPELINE.renderer is not None:
221
+ del PIPELINE.renderer
222
+ except Exception as e:
223
+ print(f"[NeAR] warning: failed to delete renderer in teardown: {e}", flush=True)
224
+ PIPELINE.renderer = None
225
+
226
+ if hasattr(PIPELINE, 'tone_mapper'):
227
+ try:
228
+ if PIPELINE.tone_mapper is not None:
229
+ del PIPELINE.tone_mapper
230
+ except Exception as e:
231
+ print(f"[NeAR] warning: failed to delete tone_mapper in teardown: {e}", flush=True)
232
+ PIPELINE.tone_mapper = None
233
+
234
+ # Move model weights back to CPU
235
  if torch.cuda.is_available():
236
  PIPELINE.to("cpu")
237
+
238
  _free_cuda()
239
 
240
 
 
369
  # ── GPU callbacks ─────────────────────────────────────────────────────────────
370
 
371
  @_gpu(duration=240)
372
+ @torch.no_grad()
373
  def generate_geometry(
374
  image_input: Optional[Image.Image],
375
  req: gr.Request,
 
405
 
406
 
407
  @_gpu(duration=240)
408
+ @torch.no_grad()
409
  def generate_slat(
410
  asset_state: Dict[str, Any],
411
  image_input: Optional[Image.Image],
 
470
  return state, f"SLaT loaded: `{Path(resolved).name}`"
471
 
472
 
473
+ @_gpu(duration=600)
474
+ @torch.no_grad()
475
+ def generate_renderings(
476
  asset_state: Dict[str, Any],
477
  hdri_file_obj: Any,
478
  hdri_rot: float,
 
483
  pitch: float,
484
  fov: float,
485
  radius: float,
486
+ simplify: float,
487
+ texture_size: int,
488
  req: gr.Request,
489
  progress: gr.Progress = gr.Progress(track_tqdm=True),
490
+ ) -> tuple[str, str, str, str, str]:
491
+ """③ Unified rendering pipeline: videos + PBR GLB in ONE CUDA context.
492
+
493
+ This merged callback avoids ZeroGPU CUDA context resets between steps.
494
+ Returns: (cam_video, hdri_video, roll_video, pbr_glb, status_msg)
495
+ """
496
  slat_path = _require_slat(asset_state)
497
  hdri_path = _require_hdri(hdri_file_obj)
498
  session_dir = CACHE_DIR / str(req.session_hash)
499
 
500
+ # ── Load NeAR once (fresh CUDA context) ───────────────────────────
501
  progress(0.05, desc="Loading NeAR on GPU")
502
  pipe = _ensure_near_on_cuda()
503
 
504
+ progress(0.08, desc="Loading SLaT / HDRI")
505
  slat = pipe.load_slat(slat_path)
506
  hdri_np = _load_hdri_resized(pipe, hdri_path)
507
 
508
+ # ── Prepare base mesh (optional, for GLB export) ────────────────────
509
+ mesh_path = asset_state.get("mesh_path")
510
+ base_mesh: Optional[trimesh.Trimesh] = None
511
+ if mesh_path and os.path.isfile(mesh_path):
512
+ print(f"[NeAR] loading Hunyuan mesh from {mesh_path}…", flush=True)
513
+ raw = trimesh.load(mesh_path, force="mesh")
514
+ print(f"[NeAR] raw mesh bounds: {raw.bounds[0].round(3)} → {raw.bounds[1].round(3)}", flush=True)
515
+ base_mesh = _hunyuan_mesh_to_renderer_space(raw)
516
+ del raw
517
+ else:
518
+ print("[NeAR] no mesh_path — will use SLaT decoder mesh for GLB", flush=True)
519
+
520
+ # ── Render videos ──────────────────────────────────────────────────
521
+ progress(0.12, desc="Rendering camera-orbit video…")
522
  cam_frames = pipe.render_camera_path_video(
523
  slat, hdri_np,
524
  num_views=int(num_cam),
 
532
  p_cam = session_dir / "video_camera_orbit.mp4"
533
  imageio.mimsave(p_cam, cam_frames, fps=int(fps))
534
  del cam_frames
535
+ _free_cuda() # Free GPU mem temporarily while still in same CUDA context
536
 
537
  progress(0.55, desc="Rendering HDRI-rotation video…")
538
  roll_frames, hdri_frames = pipe.render_hdri_rotation_video(
 
549
  p_roll = session_dir / "video_env_roll.mp4"
550
  imageio.mimsave(p_hdri, hdri_frames, fps=int(fps))
551
  imageio.mimsave(p_roll, roll_frames, fps=int(fps))
552
+ del hdri_frames, roll_frames
553
+ _free_cuda()
554
 
555
+ # ── Export PBR GLB ────────────────────────────────────────────────
556
+ # (renderer/tone_mapper still valid in same CUDA context)
557
+ progress(0.85, desc="Baking PBR GLB…")
558
+ glb = pipe.export_glb_from_slat(
559
+ slat, hdri_np,
560
+ hdri_rot_deg=float(hdri_rot),
561
+ base_mesh=base_mesh,
562
+ simplify=float(simplify),
563
+ texture_size=int(texture_size),
564
+ fill_holes=True,
565
+ )
566
+ del slat, hdri_np, base_mesh
567
+ _free_cuda()
568
+
569
+ out_path = session_dir / "near_pbr.glb"
570
+ glb.export(out_path)
571
+ del glb
572
+
573
+ # ── Cleanup: tear down NeAR before exiting CUDA callback ────────────
574
  _teardown_near()
575
+
576
+ msg = (
577
+ f"**③ Videos ready** → `{Path(p_cam).name}`, "
578
+ f"`{Path(p_hdri).name}`, `{Path(p_roll).name}` \n\n"
579
+ f"**④ PBR GLB ready** → `{Path(out_path).name}`"
580
+ )
581
+ return str(p_cam), str(p_hdri), str(p_roll), str(out_path), msg
582
 
583
 
584
  def _hunyuan_mesh_to_renderer_space(mesh: trimesh.Trimesh) -> trimesh.Trimesh:
 
603
  flush=True,
604
  )
605
  return mesh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  session_dir = CACHE_DIR / str(req.session_hash)
607
  shutil.rmtree(session_dir, ignore_errors=True)
608
  session_dir.mkdir(parents=True, exist_ok=True)
 
711
  asset_state = gr.State({})
712
 
713
  gr.Markdown(
714
+ "## NeAR — Relightable 3D (ZeroGPU Optimized)\n"
715
+ "**① Geometry** → **② SLaT** → **③ Videos + PBR GLB** (merged for stability)\n\n"
716
  "Tip: after generating geometry, swap the input image before running **② SLaT** "
717
  "to apply a different texture style to the same shape — geometry and appearance are decoupled."
718
  )
 
753
  hdri_rot = gr.Slider(0, 360, value=0, step=1, label="HDRI rotation °")
754
 
755
  gr.HTML('<p class="section-kicker" style="margin:2px 0 2px;padding:0">Actions</p>')
756
+ btn_videos = gr.Button("③ Generate Videos & GLB", variant="primary")
757
+ btn_glb = gr.Button("④ Export PBR GLB", variant="primary", visible=False)
758
  with gr.Accordion("Video / export settings", open=False):
759
  fps = gr.Slider(8, 48, value=24, step=1, label="FPS")
760
  num_cam = gr.Slider(8, 96, value=36, step=4, label="Camera-orbit frames")
 
820
  [asset_state, status])
821
  btn_load_slat.click(load_slat_file, [slat_upload, slat_path_txt],
822
  [asset_state, status])
823
+ # ③④ Unified render callback: videos + GLB in ONE CUDA context
824
+ btn_videos.click(generate_renderings,
825
+ [asset_state, hdri_file, hdri_rot, fps, num_cam, num_hdri, yaw, pitch, fov, radius, simplify, tex_size],
826
+ [vid_cam, vid_hdri, vid_roll, glb_view, status])
 
 
827
  btn_clear.click(clear_cache, [], [status])
828
 
829
  return demo