luh1124 Claude Sonnet 4.6 commited on
Commit
c513086
·
1 Parent(s): 75e7b40

fix(zerogpu): remove stale CUDA flags, enable CPU preload by default

Browse files

- Remove _NEAR_ON_CUDA / _GEOMETRY_ON_CUDA flags: always call .to("cuda")
inside @GPU callbacks (.to() is a no-op when already on the correct device),
so a fresh ZeroGPU worker never skips the host-to-device transfer.
- Default NEAR_MODEL_CPU_PRELOAD_AT_START to 1 when spaces is available
(matches the existing NEAR_GEOMETRY_OFFLOAD_AFTER_MESH pattern): models
load on CPU before demo.launch(), so the first @GPU callback only pays the
fast H2D transfer instead of a full from_pretrained() under a 90s lease.
- Release _MODEL_LOCK before .to("cuda") and setup_renderer(): the lock only
guards the from_pretrained() call; heavy CUDA work no longer blocks other
threads waiting for the lock.
- Remove _MODEL_LOCK from the geometry offload path in generate_mesh().
- Same fixes applied to app_hyshape.py.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +15 -24
  2. app_hyshape.py +10 -14
app.py CHANGED
@@ -66,8 +66,6 @@ CACHE_DIR.mkdir(exist_ok=True)
66
  _MODEL_LOCK = threading.Lock()
67
  PIPELINE: Optional[NeARImageToRelightable3DPipeline] = None
68
  GEOMETRY_PIPELINE: Optional[Hunyuan3DDiTFlowMatchingPipeline] = None
69
- _NEAR_ON_CUDA = False
70
- _GEOMETRY_ON_CUDA = False
71
 
72
  tone_mapper = ToneMapper()
73
  AVAILABLE_TONE_MAPPERS = getattr(tone_mapper, "available_views", ["AgX"])
@@ -83,7 +81,9 @@ def _truthy_env(name: str, default: str) -> bool:
83
  return v in ("1", "true", "yes", "on")
84
 
85
 
86
- _CPU_PRELOAD_AT_START = _truthy_env("NEAR_MODEL_CPU_PRELOAD_AT_START", "0")
 
 
87
  _OFFLOAD_GEOMETRY_AFTER_MESH = _truthy_env(
88
  "NEAR_GEOMETRY_OFFLOAD_AFTER_MESH", "1" if spaces is not None else "0"
89
  )
@@ -142,30 +142,24 @@ def run_model_cpu_preload_blocking() -> None:
142
 
143
 
144
  def ensure_near_on_cuda() -> None:
145
- """Load NeAR if needed (respects CPU preload), move to CUDA once, init renderer / tone mapper."""
146
- global _NEAR_ON_CUDA
147
  with _MODEL_LOCK:
148
  _ensure_near_loaded_on_cpu_locked()
149
- assert PIPELINE is not None
150
- if torch.cuda.is_available() and not _NEAR_ON_CUDA:
151
- PIPELINE.to("cuda")
152
- _NEAR_ON_CUDA = True
153
- if torch.cuda.is_available():
154
- if PIPELINE.renderer is None:
155
- PIPELINE.setup_renderer()
156
- if PIPELINE.tone_mapper is None:
157
- PIPELINE.setup_tone_mapper("AgX")
158
 
159
 
160
  def ensure_geometry_on_cuda() -> None:
161
- global _GEOMETRY_ON_CUDA
162
  with _MODEL_LOCK:
163
  _ensure_geometry_loaded_on_cpu_locked()
164
- assert GEOMETRY_PIPELINE is not None
165
- if torch.cuda.is_available() and not _GEOMETRY_ON_CUDA:
166
- GEOMETRY_PIPELINE.to("cuda")
167
- _GEOMETRY_ON_CUDA = True
168
- print("[NeAR] Hunyuan geometry pipeline on CUDA.", flush=True)
169
 
170
 
171
  def _try_release_cuda_memory() -> None:
@@ -320,11 +314,8 @@ def generate_mesh(
320
  del mesh
321
  _try_release_cuda_memory()
322
 
323
- global _GEOMETRY_ON_CUDA
324
  if _OFFLOAD_GEOMETRY_AFTER_MESH and GEOMETRY_PIPELINE is not None and torch.cuda.is_available():
325
- with _MODEL_LOCK:
326
- GEOMETRY_PIPELINE.to("cpu")
327
- _GEOMETRY_ON_CUDA = False
328
  _try_release_cuda_memory()
329
 
330
  state: Dict[str, Any] = {
 
66
  _MODEL_LOCK = threading.Lock()
67
  PIPELINE: Optional[NeARImageToRelightable3DPipeline] = None
68
  GEOMETRY_PIPELINE: Optional[Hunyuan3DDiTFlowMatchingPipeline] = None
 
 
69
 
70
  tone_mapper = ToneMapper()
71
  AVAILABLE_TONE_MAPPERS = getattr(tone_mapper, "available_views", ["AgX"])
 
81
  return v in ("1", "true", "yes", "on")
82
 
83
 
84
+ _CPU_PRELOAD_AT_START = _truthy_env(
85
+ "NEAR_MODEL_CPU_PRELOAD_AT_START", "1" if spaces is not None else "0"
86
+ )
87
  _OFFLOAD_GEOMETRY_AFTER_MESH = _truthy_env(
88
  "NEAR_GEOMETRY_OFFLOAD_AFTER_MESH", "1" if spaces is not None else "0"
89
  )
 
142
 
143
 
144
  def ensure_near_on_cuda() -> None:
145
+ """Load NeAR if needed (respects CPU preload), move to CUDA, init renderer / tone mapper."""
 
146
  with _MODEL_LOCK:
147
  _ensure_near_loaded_on_cpu_locked()
148
+ assert PIPELINE is not None
149
+ if torch.cuda.is_available():
150
+ PIPELINE.to("cuda")
151
+ if PIPELINE.renderer is None:
152
+ PIPELINE.setup_renderer()
153
+ if PIPELINE.tone_mapper is None:
154
+ PIPELINE.setup_tone_mapper("AgX")
 
 
155
 
156
 
157
  def ensure_geometry_on_cuda() -> None:
 
158
  with _MODEL_LOCK:
159
  _ensure_geometry_loaded_on_cpu_locked()
160
+ assert GEOMETRY_PIPELINE is not None
161
+ if torch.cuda.is_available():
162
+ GEOMETRY_PIPELINE.to("cuda")
 
 
163
 
164
 
165
  def _try_release_cuda_memory() -> None:
 
314
  del mesh
315
  _try_release_cuda_memory()
316
 
 
317
  if _OFFLOAD_GEOMETRY_AFTER_MESH and GEOMETRY_PIPELINE is not None and torch.cuda.is_available():
318
+ GEOMETRY_PIPELINE.to("cpu")
 
 
319
  _try_release_cuda_memory()
320
 
321
  state: Dict[str, Any] = {
app_hyshape.py CHANGED
@@ -79,7 +79,6 @@ _MODEL_LOCK = threading.Lock()
79
  _LIGHT_PREPROCESS_LOCK = threading.Lock()
80
  _LIGHT_PREPROCESSOR: Any | None = None
81
  GEOMETRY_PIPELINE: Any | None = None
82
- _GEOMETRY_ON_CUDA = False
83
 
84
 
85
  def _path_is_git_lfs_pointer(path: Path) -> bool:
@@ -251,21 +250,18 @@ def start_geometry_cpu_preload_thread() -> None:
251
 
252
  def ensure_geometry_on_cuda() -> Any:
253
  """Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
254
- global _GEOMETRY_ON_CUDA
255
  with _MODEL_LOCK:
256
  pipeline = _ensure_geometry_loaded_on_cpu_locked()
257
- if torch.cuda.is_available():
258
- if not _GEOMETRY_ON_CUDA:
259
- move_started_at = time.time()
260
- pipeline.to("cuda")
261
- _GEOMETRY_ON_CUDA = True
262
- print(
263
- f"[HyShape] geometry moved to GPU in {time.time() - move_started_at:.1f}s",
264
- flush=True,
265
- )
266
- else:
267
- print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
268
- return pipeline
269
 
270
 
271
  @GPU
 
79
  _LIGHT_PREPROCESS_LOCK = threading.Lock()
80
  _LIGHT_PREPROCESSOR: Any | None = None
81
  GEOMETRY_PIPELINE: Any | None = None
 
82
 
83
 
84
  def _path_is_git_lfs_pointer(path: Path) -> bool:
 
250
 
251
  def ensure_geometry_on_cuda() -> Any:
252
  """Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
 
253
  with _MODEL_LOCK:
254
  pipeline = _ensure_geometry_loaded_on_cpu_locked()
255
+ if torch.cuda.is_available():
256
+ move_started_at = time.time()
257
+ pipeline.to("cuda")
258
+ print(
259
+ f"[HyShape] geometry on GPU (to() took {time.time() - move_started_at:.1f}s)",
260
+ flush=True,
261
+ )
262
+ else:
263
+ print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
264
+ return pipeline
 
 
265
 
266
 
267
  @GPU