Spaces:

luh0502
/

NeAR

Sleeping

App Files Files Community

luh1124 commited on Mar 25

Commit

f6c6291

1 Parent(s): d5bbafa

refactor: use bare spaces.GPU like E-RayZer (drop per-callback duration)

Browse files

Files changed (2) hide show

DEPLOY_HF_SPACE.md +2 -2
app.py +17 -34

DEPLOY_HF_SPACE.md CHANGED Viewed

@@ -55,9 +55,9 @@ If you maintain a separate template tree (e.g. `NeAR_space`), copy changes **int
 **Application code (this repo)**
 - **`import spaces`** (optional `try/except` for local runs without the package).
-- Decorate **every Gradio callback that uses CUDA** with **`@spaces.GPU`** (optionally `duration=…`, `size="large"|"xlarge"`). The decorator is effectively a no-op off ZeroGPU per HF docs.
 - **Lazy-load** large models **inside** those callbacks (or at least after a GPU is available). This repo uses **`_ensure_models()`** so `from_pretrained` / `.to("cuda")` do not run at import time when no device exists yet.
-- GPU duration: configured at the **very top** of `app.py` (before `import spaces`). **`NEAR_ZEROGPU_HF_CEILING_S`** defaults to **90** (max **120** in code). **`NEAR_ZEROGPU_MAX_SECONDS`** / **`NEAR_ZEROGPU_DURATION_CAP`** are **rewritten in `os.environ`** if they exceed the ceiling so leftover Space Variables like `300` cannot leak into the runtime. Startup logs print **`[NeAR] ZeroGPU ceiling=…`** — if you still see errors mentioning **300s**, the running container is likely an **old build** (rebuild the Space) or another layer (not this `app.py`) is requesting GPU time.
 ### 2b2. gsplat: first-render JIT (optional mitigations)

 **Application code (this repo)**
 - **`import spaces`** (optional `try/except` for local runs without the package).
+- Decorate **every Gradio callback that uses CUDA** with **`@spaces.GPU`** (same as [E-RayZer](https://huggingface.co/spaces/qitaoz/E-RayZer): no `duration=` in app code — platform defaults apply). This repo aliases it as **`GPU`** in `app.py` and uses **`@GPU`**; locally, without the `spaces` package, it is a no-op. The decorator is effectively a no-op off ZeroGPU per HF docs.
 - **Lazy-load** large models **inside** those callbacks (or at least after a GPU is available). This repo uses **`_ensure_models()`** so `from_pretrained` / `.to("cuda")` do not run at import time when no device exists yet.
+- **Space Variables**: at the top of `app.py` (before `import spaces`), **`NEAR_ZEROGPU_MAX_SECONDS`** / **`NEAR_ZEROGPU_DURATION_CAP`** are **rewritten in `os.environ`** if they exceed **`NEAR_ZEROGPU_HF_CEILING_S`** (default **90**, max **120**) so values like `300` cannot break the Hub runtime. This does not set per-callback `duration` in Python; it only clamps env vars HF may read.
 ### 2b2. gsplat: first-render JIT (optional mitigations)

app.py CHANGED Viewed

@@ -15,22 +15,18 @@ if not os.environ.get("HF_TOKEN") and not os.environ.get("HUGGING_FACE_HUB_TOKEN
         )
 # ZeroGPU: must run before `import spaces`. Space Variables often leave NEAR_* at 300/1800; HF still rejects those.
-# Default 90s: first gsplat rasterization JIT-compiles CUDA kernels and can dominate wall time before any tqdm step.
-_ZEROGPU_HF_CEILING_S = min(max(15, int(os.environ.get("NEAR_ZEROGPU_HF_CEILING_S", "90"))), 120)
 for _ek in ("NEAR_ZEROGPU_MAX_SECONDS", "NEAR_ZEROGPU_DURATION_CAP"):
     if _ek in os.environ:
         try:
-            if int(os.environ[_ek]) > _ZEROGPU_HF_CEILING_S:
-                os.environ[_ek] = str(_ZEROGPU_HF_CEILING_S)
         except ValueError:
             pass
-_z_req = int(os.environ.get("NEAR_ZEROGPU_MAX_SECONDS", str(_ZEROGPU_HF_CEILING_S)))
-_z_cap = int(os.environ.get("NEAR_ZEROGPU_DURATION_CAP", str(_ZEROGPU_HF_CEILING_S)))
-_ZGPU_MAX_S = max(10, min(_z_req, _z_cap, _ZEROGPU_HF_CEILING_S))
 print(
-    f"[NeAR] ZeroGPU ceiling={_ZEROGPU_HF_CEILING_S}s default_fn={_ZGPU_MAX_S}s "
-    f"(raise NEAR_ZEROGPU_HF_CEILING_S up to 120 for long videos; "
-    f"first gsplat render JIT can take many minutes — overage yields HTML errors in the browser, not JSON)",
     flush=True,
 )
@@ -111,21 +107,8 @@ from trellis.pipelines import NeARImageToRelightable3DPipeline
 from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline  # pyright: ignore[reportMissingImports]
-def _zero_gpu(**kwargs):
-    """Decorator: request a GPU for this Gradio callback on HF ZeroGPU Spaces."""
-    def decorator(fn):
-        if spaces is None:
-            return fn
-        if "duration" in kwargs:
-            d = int(kwargs["duration"])
-            kwargs["duration"] = max(10, min(d, _ZEROGPU_HF_CEILING_S))
-        else:
-            kwargs.setdefault("duration", _ZGPU_MAX_S)
-        return spaces.GPU(**kwargs)(fn)
-    return decorator
 APP_DIR = Path(__file__).resolve().parent
 CACHE_DIR = APP_DIR / "tmp_gradio"
@@ -200,7 +183,7 @@ def ensure_session_dir(req: Optional[gr.Request]) -> Path:
     return d
-@_zero_gpu(duration=120)
 def clear_session_dir(req: Optional[gr.Request]) -> str:
     d = ensure_session_dir(req)
     shutil.rmtree(d, ignore_errors=True)
@@ -447,7 +430,7 @@ def set_tone_mapper(view_name: str):
         PIPELINE.setup_tone_mapper(view_name)
-@_zero_gpu()
 def preview_hdri(hdri_file_obj: Any, tone_mapper_name: str):
     _ensure_models()
     assert PIPELINE is not None
@@ -477,7 +460,7 @@ def _ensure_rgba(img: Image.Image) -> Image.Image:
     return img.convert("RGBA")
-@_zero_gpu()
 @torch.inference_mode()
 def preprocess_image_only(image_input: Optional[Image.Image]):
     _ensure_models()
@@ -505,7 +488,7 @@ def save_slat_npz(slat, save_path: Path):
 # Core pipeline functions
 # ---------------------------------------------------------------------------
-@_zero_gpu(duration=120)
 @torch.inference_mode()
 def generate_mesh(
     image_input: Optional[Image.Image],
@@ -548,7 +531,7 @@ def generate_mesh(
     )
-@_zero_gpu(duration=120)
 @torch.inference_mode()
 def generate_slat(
     asset_state: Dict[str, Any],
@@ -634,7 +617,7 @@ def load_asset_and_hdri(asset_state: Dict[str, Any], hdri_file_obj: Any, tone_ma
     return slat, hdri_np
-@_zero_gpu(duration=120)
 @torch.inference_mode()
 def render_preview(
     asset_state: Dict[str, Any],
@@ -677,7 +660,7 @@ def render_preview(
     )
-@_zero_gpu(duration=120)
 @torch.inference_mode()
 def render_camera_video(
     asset_state: Dict[str, Any],
@@ -709,7 +692,7 @@ def render_camera_video(
     return str(video_path), f"**Camera path video saved**"
-@_zero_gpu(duration=120)
 @torch.inference_mode()
 def render_hdri_video(
     asset_state: Dict[str, Any],
@@ -744,7 +727,7 @@ def render_hdri_video(
     return str(hdri_roll_path), str(render_path), "**HDRI rotation video saved**"
-@_zero_gpu(duration=120)
 def export_glb(
     asset_state: Dict[str, Any],
     hdri_file_obj: Any,

         )
 # ZeroGPU: must run before `import spaces`. Space Variables often leave NEAR_* at 300/1800; HF still rejects those.
+_ZEROGPU_ENV_CAP_S = min(max(15, int(os.environ.get("NEAR_ZEROGPU_HF_CEILING_S", "90"))), 120)
 for _ek in ("NEAR_ZEROGPU_MAX_SECONDS", "NEAR_ZEROGPU_DURATION_CAP"):
     if _ek in os.environ:
         try:
+            if int(os.environ[_ek]) > _ZEROGPU_ENV_CAP_S:
+                os.environ[_ek] = str(_ZEROGPU_ENV_CAP_S)
         except ValueError:
             pass
 print(
+    f"[NeAR] ZeroGPU: NEAR_ZEROGPU_MAX_SECONDS / NEAR_ZEROGPU_DURATION_CAP clamped to cap {_ZEROGPU_ENV_CAP_S}s "
+    f"(adjust NEAR_ZEROGPU_HF_CEILING_S up to 120 if your tier allows). "
+    f"Gradio callbacks use plain spaces.GPU (platform default duration).",
     flush=True,
 )
 from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline  # pyright: ignore[reportMissingImports]
+# Hugging Face ZeroGPU: same style as E-RayZer — bare ``spaces.GPU`` (no custom duration in app code).
+GPU = spaces.GPU if spaces is not None else (lambda f: f)
 APP_DIR = Path(__file__).resolve().parent
 CACHE_DIR = APP_DIR / "tmp_gradio"
     return d
+@GPU
 def clear_session_dir(req: Optional[gr.Request]) -> str:
     d = ensure_session_dir(req)
     shutil.rmtree(d, ignore_errors=True)
         PIPELINE.setup_tone_mapper(view_name)
+@GPU
 def preview_hdri(hdri_file_obj: Any, tone_mapper_name: str):
     _ensure_models()
     assert PIPELINE is not None
     return img.convert("RGBA")
+@GPU
 @torch.inference_mode()
 def preprocess_image_only(image_input: Optional[Image.Image]):
     _ensure_models()
 # Core pipeline functions
 # ---------------------------------------------------------------------------
+@GPU
 @torch.inference_mode()
 def generate_mesh(
     image_input: Optional[Image.Image],
     )
+@GPU
 @torch.inference_mode()
 def generate_slat(
     asset_state: Dict[str, Any],
     return slat, hdri_np
+@GPU
 @torch.inference_mode()
 def render_preview(
     asset_state: Dict[str, Any],
     )
+@GPU
 @torch.inference_mode()
 def render_camera_video(
     asset_state: Dict[str, Any],
     return str(video_path), f"**Camera path video saved**"
+@GPU
 @torch.inference_mode()
 def render_hdri_video(
     asset_state: Dict[str, Any],
     return str(hdri_roll_path), str(render_path), "**HDRI rotation video saved**"
+@GPU
 def export_glb(
     asset_state: Dict[str, Any],
     hdri_file_obj: Any,