fix(zerogpu): remove stale CUDA flags, enable CPU preload by default
Browse files- Remove _NEAR_ON_CUDA / _GEOMETRY_ON_CUDA flags: always call .to("cuda")
inside @GPU callbacks (.to() is a no-op when already on the correct device),
so a fresh ZeroGPU worker never skips the host-to-device transfer.
- Default NEAR_MODEL_CPU_PRELOAD_AT_START to 1 when spaces is available
(matches the existing NEAR_GEOMETRY_OFFLOAD_AFTER_MESH pattern): models
load on CPU before demo.launch(), so the first @GPU callback only pays the
fast H2D transfer instead of a full from_pretrained() under a 90s lease.
- Release _MODEL_LOCK before .to("cuda") and setup_renderer(): the lock only
guards the from_pretrained() call; heavy CUDA work no longer blocks other
threads waiting for the lock.
- Remove _MODEL_LOCK from the geometry offload path in generate_mesh().
- Same fixes applied to app_hyshape.py.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- app.py +15 -24
- app_hyshape.py +10 -14
|
@@ -66,8 +66,6 @@ CACHE_DIR.mkdir(exist_ok=True)
|
|
| 66 |
_MODEL_LOCK = threading.Lock()
|
| 67 |
PIPELINE: Optional[NeARImageToRelightable3DPipeline] = None
|
| 68 |
GEOMETRY_PIPELINE: Optional[Hunyuan3DDiTFlowMatchingPipeline] = None
|
| 69 |
-
_NEAR_ON_CUDA = False
|
| 70 |
-
_GEOMETRY_ON_CUDA = False
|
| 71 |
|
| 72 |
tone_mapper = ToneMapper()
|
| 73 |
AVAILABLE_TONE_MAPPERS = getattr(tone_mapper, "available_views", ["AgX"])
|
|
@@ -83,7 +81,9 @@ def _truthy_env(name: str, default: str) -> bool:
|
|
| 83 |
return v in ("1", "true", "yes", "on")
|
| 84 |
|
| 85 |
|
| 86 |
-
_CPU_PRELOAD_AT_START = _truthy_env(
|
|
|
|
|
|
|
| 87 |
_OFFLOAD_GEOMETRY_AFTER_MESH = _truthy_env(
|
| 88 |
"NEAR_GEOMETRY_OFFLOAD_AFTER_MESH", "1" if spaces is not None else "0"
|
| 89 |
)
|
|
@@ -142,30 +142,24 @@ def run_model_cpu_preload_blocking() -> None:
|
|
| 142 |
|
| 143 |
|
| 144 |
def ensure_near_on_cuda() -> None:
|
| 145 |
-
"""Load NeAR if needed (respects CPU preload), move to CUDA
|
| 146 |
-
global _NEAR_ON_CUDA
|
| 147 |
with _MODEL_LOCK:
|
| 148 |
_ensure_near_loaded_on_cpu_locked()
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
if PIPELINE.tone_mapper is None:
|
| 157 |
-
PIPELINE.setup_tone_mapper("AgX")
|
| 158 |
|
| 159 |
|
| 160 |
def ensure_geometry_on_cuda() -> None:
|
| 161 |
-
global _GEOMETRY_ON_CUDA
|
| 162 |
with _MODEL_LOCK:
|
| 163 |
_ensure_geometry_loaded_on_cpu_locked()
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
_GEOMETRY_ON_CUDA = True
|
| 168 |
-
print("[NeAR] Hunyuan geometry pipeline on CUDA.", flush=True)
|
| 169 |
|
| 170 |
|
| 171 |
def _try_release_cuda_memory() -> None:
|
|
@@ -320,11 +314,8 @@ def generate_mesh(
|
|
| 320 |
del mesh
|
| 321 |
_try_release_cuda_memory()
|
| 322 |
|
| 323 |
-
global _GEOMETRY_ON_CUDA
|
| 324 |
if _OFFLOAD_GEOMETRY_AFTER_MESH and GEOMETRY_PIPELINE is not None and torch.cuda.is_available():
|
| 325 |
-
|
| 326 |
-
GEOMETRY_PIPELINE.to("cpu")
|
| 327 |
-
_GEOMETRY_ON_CUDA = False
|
| 328 |
_try_release_cuda_memory()
|
| 329 |
|
| 330 |
state: Dict[str, Any] = {
|
|
|
|
| 66 |
_MODEL_LOCK = threading.Lock()
|
| 67 |
PIPELINE: Optional[NeARImageToRelightable3DPipeline] = None
|
| 68 |
GEOMETRY_PIPELINE: Optional[Hunyuan3DDiTFlowMatchingPipeline] = None
|
|
|
|
|
|
|
| 69 |
|
| 70 |
tone_mapper = ToneMapper()
|
| 71 |
AVAILABLE_TONE_MAPPERS = getattr(tone_mapper, "available_views", ["AgX"])
|
|
|
|
| 81 |
return v in ("1", "true", "yes", "on")
|
| 82 |
|
| 83 |
|
| 84 |
+
_CPU_PRELOAD_AT_START = _truthy_env(
|
| 85 |
+
"NEAR_MODEL_CPU_PRELOAD_AT_START", "1" if spaces is not None else "0"
|
| 86 |
+
)
|
| 87 |
_OFFLOAD_GEOMETRY_AFTER_MESH = _truthy_env(
|
| 88 |
"NEAR_GEOMETRY_OFFLOAD_AFTER_MESH", "1" if spaces is not None else "0"
|
| 89 |
)
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
def ensure_near_on_cuda() -> None:
|
| 145 |
+
"""Load NeAR if needed (respects CPU preload), move to CUDA, init renderer / tone mapper."""
|
|
|
|
| 146 |
with _MODEL_LOCK:
|
| 147 |
_ensure_near_loaded_on_cpu_locked()
|
| 148 |
+
assert PIPELINE is not None
|
| 149 |
+
if torch.cuda.is_available():
|
| 150 |
+
PIPELINE.to("cuda")
|
| 151 |
+
if PIPELINE.renderer is None:
|
| 152 |
+
PIPELINE.setup_renderer()
|
| 153 |
+
if PIPELINE.tone_mapper is None:
|
| 154 |
+
PIPELINE.setup_tone_mapper("AgX")
|
|
|
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
def ensure_geometry_on_cuda() -> None:
|
|
|
|
| 158 |
with _MODEL_LOCK:
|
| 159 |
_ensure_geometry_loaded_on_cpu_locked()
|
| 160 |
+
assert GEOMETRY_PIPELINE is not None
|
| 161 |
+
if torch.cuda.is_available():
|
| 162 |
+
GEOMETRY_PIPELINE.to("cuda")
|
|
|
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
def _try_release_cuda_memory() -> None:
|
|
|
|
| 314 |
del mesh
|
| 315 |
_try_release_cuda_memory()
|
| 316 |
|
|
|
|
| 317 |
if _OFFLOAD_GEOMETRY_AFTER_MESH and GEOMETRY_PIPELINE is not None and torch.cuda.is_available():
|
| 318 |
+
GEOMETRY_PIPELINE.to("cpu")
|
|
|
|
|
|
|
| 319 |
_try_release_cuda_memory()
|
| 320 |
|
| 321 |
state: Dict[str, Any] = {
|
|
@@ -79,7 +79,6 @@ _MODEL_LOCK = threading.Lock()
|
|
| 79 |
_LIGHT_PREPROCESS_LOCK = threading.Lock()
|
| 80 |
_LIGHT_PREPROCESSOR: Any | None = None
|
| 81 |
GEOMETRY_PIPELINE: Any | None = None
|
| 82 |
-
_GEOMETRY_ON_CUDA = False
|
| 83 |
|
| 84 |
|
| 85 |
def _path_is_git_lfs_pointer(path: Path) -> bool:
|
|
@@ -251,21 +250,18 @@ def start_geometry_cpu_preload_thread() -> None:
|
|
| 251 |
|
| 252 |
def ensure_geometry_on_cuda() -> Any:
|
| 253 |
"""Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
|
| 254 |
-
global _GEOMETRY_ON_CUDA
|
| 255 |
with _MODEL_LOCK:
|
| 256 |
pipeline = _ensure_geometry_loaded_on_cpu_locked()
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
|
| 268 |
-
return pipeline
|
| 269 |
|
| 270 |
|
| 271 |
@GPU
|
|
|
|
| 79 |
_LIGHT_PREPROCESS_LOCK = threading.Lock()
|
| 80 |
_LIGHT_PREPROCESSOR: Any | None = None
|
| 81 |
GEOMETRY_PIPELINE: Any | None = None
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def _path_is_git_lfs_pointer(path: Path) -> bool:
|
|
|
|
| 250 |
|
| 251 |
def ensure_geometry_on_cuda() -> Any:
|
| 252 |
"""Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
|
|
|
|
| 253 |
with _MODEL_LOCK:
|
| 254 |
pipeline = _ensure_geometry_loaded_on_cpu_locked()
|
| 255 |
+
if torch.cuda.is_available():
|
| 256 |
+
move_started_at = time.time()
|
| 257 |
+
pipeline.to("cuda")
|
| 258 |
+
print(
|
| 259 |
+
f"[HyShape] geometry on GPU (to() took {time.time() - move_started_at:.1f}s)",
|
| 260 |
+
flush=True,
|
| 261 |
+
)
|
| 262 |
+
else:
|
| 263 |
+
print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
|
| 264 |
+
return pipeline
|
|
|
|
|
|
|
| 265 |
|
| 266 |
|
| 267 |
@GPU
|