[Admin maintenance] Support new ZeroGPU hardware

#5
by multimodalart HF Staff - opened
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +150 -4
  3. requirements.txt +1 -13
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🖥️
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.34.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -1,12 +1,158 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from gradio_litmodel3d import LitModel3D
4
 
5
- import os
6
  import shutil
7
- os.environ['SPCONV_ALGO'] = 'native'
8
  from typing import *
9
- import torch
10
  import numpy as np
11
  import imageio
12
  from easydict import EasyDict as edict
 
1
+ import os
2
+ os.environ.setdefault("SPCONV_ALGO", "native")
3
+ os.environ.setdefault("ATTN_BACKEND", "xformers")
4
+ os.environ.setdefault("SPARSE_ATTN_BACKEND", "xformers")
5
+
6
+ import subprocess
7
+ import sys
8
+
9
+ # Install gradio_litmodel3d ignoring its over-restrictive gradio<5 cap.
10
+ try:
11
+ import gradio_litmodel3d # noqa: F401
12
+ except ImportError:
13
+ subprocess.check_call(
14
+ [sys.executable, "-m", "pip", "install", "--no-deps", "gradio_litmodel3d==0.0.1"],
15
+ )
16
+
17
  import spaces
18
+ import torch
19
+ import ctypes
20
+ import tempfile
21
+
22
+ CUDA_HOME = "/cuda-image/usr/local/cuda-13.0"
23
+ CUDA_LIBDIR = os.path.join(CUDA_HOME, "lib64")
24
+
25
+
26
+ @spaces.GPU(duration=600)
27
+ def _first_gpu_setup():
28
+ need = {}
29
+ for name, modname in [
30
+ ("nvdiffrast", "nvdiffrast"),
31
+ ("diff_gaussian_rasterization", "diff_gaussian_rasterization"),
32
+ ]:
33
+ try:
34
+ __import__(modname)
35
+ except ImportError:
36
+ need[name] = True
37
+ if not need:
38
+ return
39
+
40
+ patch_dir = tempfile.mkdtemp(prefix="torch_cuda_patch_")
41
+ with open(os.path.join(patch_dir, "sitecustomize.py"), "w") as f:
42
+ f.write(
43
+ "try:\n"
44
+ " import torch.utils.cpp_extension as _c\n"
45
+ " _c._check_cuda_version = lambda *a, **k: None\n"
46
+ "except Exception:\n"
47
+ " pass\n"
48
+ )
49
+
50
+ env = os.environ.copy()
51
+ env["CUDA_HOME"] = CUDA_HOME
52
+ env["CUDA_PATH"] = CUDA_HOME
53
+ env["PATH"] = os.path.join(CUDA_HOME, "bin") + os.pathsep + env.get("PATH", "")
54
+ env["PYTHONPATH"] = patch_dir + os.pathsep + env.get("PYTHONPATH", "")
55
+ env["TORCH_CUDA_ARCH_LIST"] = "12.0"
56
+
57
+ subprocess.check_call(
58
+ [sys.executable, "-m", "pip", "install", "--no-deps",
59
+ "setuptools", "wheel", "ninja", "packaging"],
60
+ )
61
+
62
+ if "nvdiffrast" in need:
63
+ subprocess.check_call(
64
+ [sys.executable, "-m", "pip", "install",
65
+ "--no-build-isolation", "--no-deps",
66
+ "git+https://github.com/NVlabs/nvdiffrast/"],
67
+ env=env,
68
+ )
69
+ if "diff_gaussian_rasterization" in need:
70
+ mip = tempfile.mkdtemp(prefix="mip_")
71
+ subprocess.check_call(
72
+ ["git", "clone", "--recursive", "--depth=1",
73
+ "https://github.com/autonomousvision/mip-splatting.git", mip],
74
+ )
75
+ subprocess.check_call(
76
+ [sys.executable, "-m", "pip", "install",
77
+ "--no-build-isolation", "--no-deps",
78
+ os.path.join(mip, "submodules", "diff-gaussian-rasterization")],
79
+ env=env,
80
+ )
81
+
82
+
83
+ _first_gpu_setup()
84
+ try:
85
+ ctypes.CDLL(os.path.join(CUDA_LIBDIR, "libcudart.so.13"), mode=ctypes.RTLD_GLOBAL)
86
+ os.environ["LD_LIBRARY_PATH"] = CUDA_LIBDIR + os.pathsep + os.environ.get("LD_LIBRARY_PATH", "")
87
+ except OSError:
88
+ pass
89
+
90
+ # xformers on the Blackwell (sm_120) ZeroGPU container is built without CUDA
91
+ # extensions for any FwOp: cutlassF-pt rejects compute capability >= (9, 0)
92
+ # ("too new") and FlashAttn3 is Hopper-only. Reroute xformers.ops.memory_efficient_attention
93
+ # (used by DINOv2, VGGT, trellis dense+sparse paths) to torch.nn.functional.scaled_dot_product_attention,
94
+ # which is CUDA-native on torch 2.10/2.11 and supports sm_120. Must be patched BEFORE
95
+ # anything that calls memory_efficient_attention is imported.
96
+ try:
97
+ import xformers.ops as _xops
98
+ import torch.nn.functional as _F
99
+ from xformers.ops.fmha.attn_bias import BlockDiagonalMask as _BlockDiagonalMask
100
+
101
+ def _bdm_starts(seqinfo):
102
+ # xformers' BlockDiagonalMask sub-attribute. Try the public python-list view first;
103
+ # otherwise pull from the tensor and tolist().
104
+ for attr in ("seqstart_py", "_seqstart_py"):
105
+ v = getattr(seqinfo, attr, None)
106
+ if v is not None:
107
+ return list(v)
108
+ t = getattr(seqinfo, "seqstart", None)
109
+ if t is not None:
110
+ return t.detach().cpu().tolist()
111
+ raise AttributeError("BlockDiagonalMask seqinfo has no seqstart_py / seqstart")
112
+
113
+ def _mea_sdpa(q, k, v, attn_bias=None, p=0.0, scale=None, op=None):
114
+ # q, k, v shapes: [B, M, H, K] (xformers convention). SDPA wants [B, H, M, K].
115
+ if isinstance(attn_bias, _BlockDiagonalMask):
116
+ # Block-diagonal mask used by trellis sparse attention to batch
117
+ # variable-length sequences in a single dense tensor. Materialize each
118
+ # block separately and concat — SDPA has no block-diagonal kernel.
119
+ q_starts = _bdm_starts(attn_bias.q_seqinfo)
120
+ k_starts = _bdm_starts(attn_bias.k_seqinfo)
121
+ outs = []
122
+ # q,k,v come in as [1, total_tokens, H, K]
123
+ for i in range(len(q_starts) - 1):
124
+ qs, qe = q_starts[i], q_starts[i + 1]
125
+ ks, ke = k_starts[i], k_starts[i + 1]
126
+ qi = q[:, qs:qe].transpose(1, 2) # [1, H, Lq, K]
127
+ ki = k[:, ks:ke].transpose(1, 2) # [1, H, Lk, K]
128
+ vi = v[:, ks:ke].transpose(1, 2)
129
+ oi = _F.scaled_dot_product_attention(qi, ki, vi, dropout_p=p, scale=scale)
130
+ outs.append(oi.transpose(1, 2)) # back to [1, Li, H, K]
131
+ return torch.cat(outs, dim=1)
132
+
133
+ attn_mask = None
134
+ if attn_bias is not None and hasattr(attn_bias, "materialize"):
135
+ attn_mask = attn_bias.materialize((q.shape[0], q.shape[2], q.shape[1], k.shape[1]),
136
+ dtype=q.dtype, device=q.device)
137
+ elif attn_bias is not None:
138
+ attn_mask = attn_bias
139
+
140
+ qh = q.transpose(1, 2) # [B, H, M, K]
141
+ kh = k.transpose(1, 2)
142
+ vh = v.transpose(1, 2)
143
+ out = _F.scaled_dot_product_attention(qh, kh, vh, attn_mask=attn_mask, dropout_p=p, scale=scale)
144
+ return out.transpose(1, 2) # [B, M, H, K]
145
+
146
+ _xops.memory_efficient_attention = _mea_sdpa
147
+ print("[blackwell] xformers.memory_efficient_attention rerouted to torch SDPA")
148
+ except Exception as _e:
149
+ print(f"[blackwell] xformers SDPA shim skipped: {_e}")
150
+
151
+ import gradio as gr
152
  from gradio_litmodel3d import LitModel3D
153
 
 
154
  import shutil
 
155
  from typing import *
 
156
  import numpy as np
157
  import imageio
158
  from easydict import EasyDict as edict
requirements.txt CHANGED
@@ -1,9 +1,5 @@
1
- --extra-index-url https://download.pytorch.org/whl/cu121
2
-
3
  kornia==0.8.0
4
 
5
- torch==2.4.0
6
- torchvision==0.19.0
7
  pillow==10.4.0
8
  imageio==2.36.1
9
  imageio-ffmpeg==0.5.1
@@ -19,17 +15,9 @@ pyvista==0.44.2
19
  pymeshfix==0.17.0
20
  igraph==0.11.8
21
  git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
22
- xformers==0.0.27.post2
23
  spconv-cu120==2.3.6
24
  transformers==4.46.3
25
- gradio_litmodel3d==0.0.1
26
- pydantic==2.10.6
27
  einops==0.8.1
28
- # huggingface_hub==0.25.0
29
- huggingface_hub==0.33.4
30
  lpips==0.1.4
31
- spaces==0.37.1
32
  timm==1.0.23
33
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
34
- https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
35
- https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
 
 
 
1
  kornia==0.8.0
2
 
 
 
3
  pillow==10.4.0
4
  imageio==2.36.1
5
  imageio-ffmpeg==0.5.1
 
15
  pymeshfix==0.17.0
16
  igraph==0.11.8
17
  git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
18
+ xformers
19
  spconv-cu120==2.3.6
20
  transformers==4.46.3
 
 
21
  einops==0.8.1
 
 
22
  lpips==0.1.4
 
23
  timm==1.0.23