Spaces:

luh0502
/

NeAR

Running on Zero

luh1124 Claude Sonnet 4.6 commited on Mar 28

Commit

c513086

1 Parent(s): 75e7b40

fix(zerogpu): remove stale CUDA flags, enable CPU preload by default

- Remove _NEAR_ON_CUDA / _GEOMETRY_ON_CUDA flags: always call .to("cuda")
inside @GPU callbacks (.to() is a no-op when already on the correct device),
so a fresh ZeroGPU worker never skips the host-to-device transfer.
- Default NEAR_MODEL_CPU_PRELOAD_AT_START to 1 when spaces is available
(matches the existing NEAR_GEOMETRY_OFFLOAD_AFTER_MESH pattern): models
load on CPU before demo.launch(), so the first @GPU callback only pays the
fast H2D transfer instead of a full from_pretrained() under a 90s lease.
- Release _MODEL_LOCK before .to("cuda") and setup_renderer(): the lock only
guards the from_pretrained() call; heavy CUDA work no longer blocks other
threads waiting for the lock.
- Remove _MODEL_LOCK from the geometry offload path in generate_mesh().
- Same fixes applied to app_hyshape.py.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show

app.py +15 -24
app_hyshape.py +10 -14

app.py CHANGED Viewed

@@ -66,8 +66,6 @@ CACHE_DIR.mkdir(exist_ok=True)
 _MODEL_LOCK = threading.Lock()
 PIPELINE: Optional[NeARImageToRelightable3DPipeline] = None
 GEOMETRY_PIPELINE: Optional[Hunyuan3DDiTFlowMatchingPipeline] = None
-_NEAR_ON_CUDA = False
-_GEOMETRY_ON_CUDA = False
 tone_mapper = ToneMapper()
 AVAILABLE_TONE_MAPPERS = getattr(tone_mapper, "available_views", ["AgX"])
@@ -83,7 +81,9 @@ def _truthy_env(name: str, default: str) -> bool:
     return v in ("1", "true", "yes", "on")
-_CPU_PRELOAD_AT_START = _truthy_env("NEAR_MODEL_CPU_PRELOAD_AT_START", "0")
 _OFFLOAD_GEOMETRY_AFTER_MESH = _truthy_env(
     "NEAR_GEOMETRY_OFFLOAD_AFTER_MESH", "1" if spaces is not None else "0"
 )
@@ -142,30 +142,24 @@ def run_model_cpu_preload_blocking() -> None:
 def ensure_near_on_cuda() -> None:
-    """Load NeAR if needed (respects CPU preload), move to CUDA once, init renderer / tone mapper."""
-    global _NEAR_ON_CUDA
     with _MODEL_LOCK:
         _ensure_near_loaded_on_cpu_locked()
-        assert PIPELINE is not None
-        if torch.cuda.is_available() and not _NEAR_ON_CUDA:
-            PIPELINE.to("cuda")
-            _NEAR_ON_CUDA = True
-        if torch.cuda.is_available():
-            if PIPELINE.renderer is None:
-                PIPELINE.setup_renderer()
-            if PIPELINE.tone_mapper is None:
-                PIPELINE.setup_tone_mapper("AgX")
 def ensure_geometry_on_cuda() -> None:
-    global _GEOMETRY_ON_CUDA
     with _MODEL_LOCK:
         _ensure_geometry_loaded_on_cpu_locked()
-        assert GEOMETRY_PIPELINE is not None
-        if torch.cuda.is_available() and not _GEOMETRY_ON_CUDA:
-            GEOMETRY_PIPELINE.to("cuda")
-            _GEOMETRY_ON_CUDA = True
-            print("[NeAR] Hunyuan geometry pipeline on CUDA.", flush=True)
 def _try_release_cuda_memory() -> None:
@@ -320,11 +314,8 @@ def generate_mesh(
     del mesh
     _try_release_cuda_memory()
-    global _GEOMETRY_ON_CUDA
     if _OFFLOAD_GEOMETRY_AFTER_MESH and GEOMETRY_PIPELINE is not None and torch.cuda.is_available():
-        with _MODEL_LOCK:
-            GEOMETRY_PIPELINE.to("cpu")
-            _GEOMETRY_ON_CUDA = False
         _try_release_cuda_memory()
     state: Dict[str, Any] = {

 _MODEL_LOCK = threading.Lock()
 PIPELINE: Optional[NeARImageToRelightable3DPipeline] = None
 GEOMETRY_PIPELINE: Optional[Hunyuan3DDiTFlowMatchingPipeline] = None
 tone_mapper = ToneMapper()
 AVAILABLE_TONE_MAPPERS = getattr(tone_mapper, "available_views", ["AgX"])
     return v in ("1", "true", "yes", "on")
+_CPU_PRELOAD_AT_START = _truthy_env(
+    "NEAR_MODEL_CPU_PRELOAD_AT_START", "1" if spaces is not None else "0"
+)
 _OFFLOAD_GEOMETRY_AFTER_MESH = _truthy_env(
     "NEAR_GEOMETRY_OFFLOAD_AFTER_MESH", "1" if spaces is not None else "0"
 )
 def ensure_near_on_cuda() -> None:
+    """Load NeAR if needed (respects CPU preload), move to CUDA, init renderer / tone mapper."""
     with _MODEL_LOCK:
         _ensure_near_loaded_on_cpu_locked()
+    assert PIPELINE is not None
+    if torch.cuda.is_available():
+        PIPELINE.to("cuda")
+        if PIPELINE.renderer is None:
+            PIPELINE.setup_renderer()
+        if PIPELINE.tone_mapper is None:
+            PIPELINE.setup_tone_mapper("AgX")
 def ensure_geometry_on_cuda() -> None:
     with _MODEL_LOCK:
         _ensure_geometry_loaded_on_cpu_locked()
+    assert GEOMETRY_PIPELINE is not None
+    if torch.cuda.is_available():
+        GEOMETRY_PIPELINE.to("cuda")
 def _try_release_cuda_memory() -> None:
     del mesh
     _try_release_cuda_memory()
     if _OFFLOAD_GEOMETRY_AFTER_MESH and GEOMETRY_PIPELINE is not None and torch.cuda.is_available():
+        GEOMETRY_PIPELINE.to("cpu")
         _try_release_cuda_memory()
     state: Dict[str, Any] = {

app_hyshape.py CHANGED Viewed

@@ -79,7 +79,6 @@ _MODEL_LOCK = threading.Lock()
 _LIGHT_PREPROCESS_LOCK = threading.Lock()
 _LIGHT_PREPROCESSOR: Any | None = None
 GEOMETRY_PIPELINE: Any | None = None
-_GEOMETRY_ON_CUDA = False
 def _path_is_git_lfs_pointer(path: Path) -> bool:
@@ -251,21 +250,18 @@ def start_geometry_cpu_preload_thread() -> None:
 def ensure_geometry_on_cuda() -> Any:
     """Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
-    global _GEOMETRY_ON_CUDA
     with _MODEL_LOCK:
         pipeline = _ensure_geometry_loaded_on_cpu_locked()
-        if torch.cuda.is_available():
-            if not _GEOMETRY_ON_CUDA:
-                move_started_at = time.time()
-                pipeline.to("cuda")
-                _GEOMETRY_ON_CUDA = True
-                print(
-                    f"[HyShape] geometry moved to GPU in {time.time() - move_started_at:.1f}s",
-                    flush=True,
-                )
-        else:
-            print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
-        return pipeline
 @GPU

 _LIGHT_PREPROCESS_LOCK = threading.Lock()
 _LIGHT_PREPROCESSOR: Any | None = None
 GEOMETRY_PIPELINE: Any | None = None
 def _path_is_git_lfs_pointer(path: Path) -> bool:
 def ensure_geometry_on_cuda() -> Any:
     """Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
     with _MODEL_LOCK:
         pipeline = _ensure_geometry_loaded_on_cpu_locked()
+    if torch.cuda.is_available():
+        move_started_at = time.time()
+        pipeline.to("cuda")
+        print(
+            f"[HyShape] geometry on GPU (to() took {time.time() - move_started_at:.1f}s)",
+            flush=True,
+        )
+    else:
+        print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
+    return pipeline
 @GPU