luh1124 commited on
Commit
f6c6291
·
1 Parent(s): d5bbafa

refactor: use bare spaces.GPU like E-RayZer (drop per-callback duration)

Browse files
Files changed (2) hide show
  1. DEPLOY_HF_SPACE.md +2 -2
  2. app.py +17 -34
DEPLOY_HF_SPACE.md CHANGED
@@ -55,9 +55,9 @@ If you maintain a separate template tree (e.g. `NeAR_space`), copy changes **int
55
  **Application code (this repo)**
56
 
57
  - **`import spaces`** (optional `try/except` for local runs without the package).
58
- - Decorate **every Gradio callback that uses CUDA** with **`@spaces.GPU`** (optionally `duration=`, `size="large"|"xlarge"`). The decorator is effectively a no-op off ZeroGPU per HF docs.
59
  - **Lazy-load** large models **inside** those callbacks (or at least after a GPU is available). This repo uses **`_ensure_models()`** so `from_pretrained` / `.to("cuda")` do not run at import time when no device exists yet.
60
- - GPU duration: configured at the **very top** of `app.py` (before `import spaces`). **`NEAR_ZEROGPU_HF_CEILING_S`** defaults to **90** (max **120** in code). **`NEAR_ZEROGPU_MAX_SECONDS`** / **`NEAR_ZEROGPU_DURATION_CAP`** are **rewritten in `os.environ`** if they exceed the ceiling so leftover Space Variables like `300` cannot leak into the runtime. Startup logs print **`[NeAR] ZeroGPU ceiling=…`** if you still see errors mentioning **300s**, the running container is likely an **old build** (rebuild the Space) or another layer (not this `app.py`) is requesting GPU time.
61
 
62
  ### 2b2. gsplat: first-render JIT (optional mitigations)
63
 
 
55
  **Application code (this repo)**
56
 
57
  - **`import spaces`** (optional `try/except` for local runs without the package).
58
+ - Decorate **every Gradio callback that uses CUDA** with **`@spaces.GPU`** (same as [E-RayZer](https://huggingface.co/spaces/qitaoz/E-RayZer): no `duration=` in app code — platform defaults apply). This repo aliases it as **`GPU`** in `app.py` and uses **`@GPU`**; locally, without the `spaces` package, it is a no-op. The decorator is effectively a no-op off ZeroGPU per HF docs.
59
  - **Lazy-load** large models **inside** those callbacks (or at least after a GPU is available). This repo uses **`_ensure_models()`** so `from_pretrained` / `.to("cuda")` do not run at import time when no device exists yet.
60
+ - **Space Variables**: at the top of `app.py` (before `import spaces`), **`NEAR_ZEROGPU_MAX_SECONDS`** / **`NEAR_ZEROGPU_DURATION_CAP`** are **rewritten in `os.environ`** if they exceed **`NEAR_ZEROGPU_HF_CEILING_S`** (default **90**, max **120**) so values like `300` cannot break the Hub runtime. This does not set per-callback `duration` in Python; it only clamps env vars HF may read.
61
 
62
  ### 2b2. gsplat: first-render JIT (optional mitigations)
63
 
app.py CHANGED
@@ -15,22 +15,18 @@ if not os.environ.get("HF_TOKEN") and not os.environ.get("HUGGING_FACE_HUB_TOKEN
15
  )
16
 
17
  # ZeroGPU: must run before `import spaces`. Space Variables often leave NEAR_* at 300/1800; HF still rejects those.
18
- # Default 90s: first gsplat rasterization JIT-compiles CUDA kernels and can dominate wall time before any tqdm step.
19
- _ZEROGPU_HF_CEILING_S = min(max(15, int(os.environ.get("NEAR_ZEROGPU_HF_CEILING_S", "90"))), 120)
20
  for _ek in ("NEAR_ZEROGPU_MAX_SECONDS", "NEAR_ZEROGPU_DURATION_CAP"):
21
  if _ek in os.environ:
22
  try:
23
- if int(os.environ[_ek]) > _ZEROGPU_HF_CEILING_S:
24
- os.environ[_ek] = str(_ZEROGPU_HF_CEILING_S)
25
  except ValueError:
26
  pass
27
- _z_req = int(os.environ.get("NEAR_ZEROGPU_MAX_SECONDS", str(_ZEROGPU_HF_CEILING_S)))
28
- _z_cap = int(os.environ.get("NEAR_ZEROGPU_DURATION_CAP", str(_ZEROGPU_HF_CEILING_S)))
29
- _ZGPU_MAX_S = max(10, min(_z_req, _z_cap, _ZEROGPU_HF_CEILING_S))
30
  print(
31
- f"[NeAR] ZeroGPU ceiling={_ZEROGPU_HF_CEILING_S}s default_fn={_ZGPU_MAX_S}s "
32
- f"(raise NEAR_ZEROGPU_HF_CEILING_S up to 120 for long videos; "
33
- f"first gsplat render JIT can take many minutes — overage yields HTML errors in the browser, not JSON)",
34
  flush=True,
35
  )
36
 
@@ -111,21 +107,8 @@ from trellis.pipelines import NeARImageToRelightable3DPipeline
111
  from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline # pyright: ignore[reportMissingImports]
112
 
113
 
114
- def _zero_gpu(**kwargs):
115
- """Decorator: request a GPU for this Gradio callback on HF ZeroGPU Spaces."""
116
-
117
- def decorator(fn):
118
- if spaces is None:
119
- return fn
120
- if "duration" in kwargs:
121
- d = int(kwargs["duration"])
122
- kwargs["duration"] = max(10, min(d, _ZEROGPU_HF_CEILING_S))
123
- else:
124
- kwargs.setdefault("duration", _ZGPU_MAX_S)
125
- return spaces.GPU(**kwargs)(fn)
126
-
127
- return decorator
128
-
129
 
130
  APP_DIR = Path(__file__).resolve().parent
131
  CACHE_DIR = APP_DIR / "tmp_gradio"
@@ -200,7 +183,7 @@ def ensure_session_dir(req: Optional[gr.Request]) -> Path:
200
  return d
201
 
202
 
203
- @_zero_gpu(duration=120)
204
  def clear_session_dir(req: Optional[gr.Request]) -> str:
205
  d = ensure_session_dir(req)
206
  shutil.rmtree(d, ignore_errors=True)
@@ -447,7 +430,7 @@ def set_tone_mapper(view_name: str):
447
  PIPELINE.setup_tone_mapper(view_name)
448
 
449
 
450
- @_zero_gpu()
451
  def preview_hdri(hdri_file_obj: Any, tone_mapper_name: str):
452
  _ensure_models()
453
  assert PIPELINE is not None
@@ -477,7 +460,7 @@ def _ensure_rgba(img: Image.Image) -> Image.Image:
477
  return img.convert("RGBA")
478
 
479
 
480
- @_zero_gpu()
481
  @torch.inference_mode()
482
  def preprocess_image_only(image_input: Optional[Image.Image]):
483
  _ensure_models()
@@ -505,7 +488,7 @@ def save_slat_npz(slat, save_path: Path):
505
  # Core pipeline functions
506
  # ---------------------------------------------------------------------------
507
 
508
- @_zero_gpu(duration=120)
509
  @torch.inference_mode()
510
  def generate_mesh(
511
  image_input: Optional[Image.Image],
@@ -548,7 +531,7 @@ def generate_mesh(
548
  )
549
 
550
 
551
- @_zero_gpu(duration=120)
552
  @torch.inference_mode()
553
  def generate_slat(
554
  asset_state: Dict[str, Any],
@@ -634,7 +617,7 @@ def load_asset_and_hdri(asset_state: Dict[str, Any], hdri_file_obj: Any, tone_ma
634
  return slat, hdri_np
635
 
636
 
637
- @_zero_gpu(duration=120)
638
  @torch.inference_mode()
639
  def render_preview(
640
  asset_state: Dict[str, Any],
@@ -677,7 +660,7 @@ def render_preview(
677
  )
678
 
679
 
680
- @_zero_gpu(duration=120)
681
  @torch.inference_mode()
682
  def render_camera_video(
683
  asset_state: Dict[str, Any],
@@ -709,7 +692,7 @@ def render_camera_video(
709
  return str(video_path), f"**Camera path video saved**"
710
 
711
 
712
- @_zero_gpu(duration=120)
713
  @torch.inference_mode()
714
  def render_hdri_video(
715
  asset_state: Dict[str, Any],
@@ -744,7 +727,7 @@ def render_hdri_video(
744
  return str(hdri_roll_path), str(render_path), "**HDRI rotation video saved**"
745
 
746
 
747
- @_zero_gpu(duration=120)
748
  def export_glb(
749
  asset_state: Dict[str, Any],
750
  hdri_file_obj: Any,
 
15
  )
16
 
17
  # ZeroGPU: must run before `import spaces`. Space Variables often leave NEAR_* at 300/1800; HF still rejects those.
18
+ _ZEROGPU_ENV_CAP_S = min(max(15, int(os.environ.get("NEAR_ZEROGPU_HF_CEILING_S", "90"))), 120)
 
19
  for _ek in ("NEAR_ZEROGPU_MAX_SECONDS", "NEAR_ZEROGPU_DURATION_CAP"):
20
  if _ek in os.environ:
21
  try:
22
+ if int(os.environ[_ek]) > _ZEROGPU_ENV_CAP_S:
23
+ os.environ[_ek] = str(_ZEROGPU_ENV_CAP_S)
24
  except ValueError:
25
  pass
 
 
 
26
  print(
27
+ f"[NeAR] ZeroGPU: NEAR_ZEROGPU_MAX_SECONDS / NEAR_ZEROGPU_DURATION_CAP clamped to cap {_ZEROGPU_ENV_CAP_S}s "
28
+ f"(adjust NEAR_ZEROGPU_HF_CEILING_S up to 120 if your tier allows). "
29
+ f"Gradio callbacks use plain spaces.GPU (platform default duration).",
30
  flush=True,
31
  )
32
 
 
107
  from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline # pyright: ignore[reportMissingImports]
108
 
109
 
110
+ # Hugging Face ZeroGPU: same style as E-RayZer — bare ``spaces.GPU`` (no custom duration in app code).
111
+ GPU = spaces.GPU if spaces is not None else (lambda f: f)
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  APP_DIR = Path(__file__).resolve().parent
114
  CACHE_DIR = APP_DIR / "tmp_gradio"
 
183
  return d
184
 
185
 
186
+ @GPU
187
  def clear_session_dir(req: Optional[gr.Request]) -> str:
188
  d = ensure_session_dir(req)
189
  shutil.rmtree(d, ignore_errors=True)
 
430
  PIPELINE.setup_tone_mapper(view_name)
431
 
432
 
433
+ @GPU
434
  def preview_hdri(hdri_file_obj: Any, tone_mapper_name: str):
435
  _ensure_models()
436
  assert PIPELINE is not None
 
460
  return img.convert("RGBA")
461
 
462
 
463
+ @GPU
464
  @torch.inference_mode()
465
  def preprocess_image_only(image_input: Optional[Image.Image]):
466
  _ensure_models()
 
488
  # Core pipeline functions
489
  # ---------------------------------------------------------------------------
490
 
491
+ @GPU
492
  @torch.inference_mode()
493
  def generate_mesh(
494
  image_input: Optional[Image.Image],
 
531
  )
532
 
533
 
534
+ @GPU
535
  @torch.inference_mode()
536
  def generate_slat(
537
  asset_state: Dict[str, Any],
 
617
  return slat, hdri_np
618
 
619
 
620
+ @GPU
621
  @torch.inference_mode()
622
  def render_preview(
623
  asset_state: Dict[str, Any],
 
660
  )
661
 
662
 
663
+ @GPU
664
  @torch.inference_mode()
665
  def render_camera_video(
666
  asset_state: Dict[str, Any],
 
692
  return str(video_path), f"**Camera path video saved**"
693
 
694
 
695
+ @GPU
696
  @torch.inference_mode()
697
  def render_hdri_video(
698
  asset_state: Dict[str, Any],
 
727
  return str(hdri_roll_path), str(render_path), "**HDRI rotation video saved**"
728
 
729
 
730
+ @GPU
731
  def export_glb(
732
  asset_state: Dict[str, Any],
733
  hdri_file_obj: Any,