Spaces:

kairunwen
/

InstantSplat

Running on Zero

App Files Files Community

[Admin maintenance] Support new ZeroGPU hardware

by multimodalart HF Staff - opened 7 days ago

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

+81

-11

Files changed (4) hide show

README.md +1 -1
app.py +76 -6
requirements.txt +3 -3
submodules/dust3r/croco/models/curope/kernels.cu +1 -1

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ⚡
 colorFrom: gray
 colorTo: green
 sdk: gradio
-sdk_version: 4.20.1
 python_version: 3.10.13
 app_file: app.py
 pinned: false

 colorFrom: gray
 colorTo: green
 sdk: gradio
+sdk_version: 5.49.1
 python_version: 3.10.13
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -1,18 +1,88 @@
 import os, subprocess, shlex, sys, gc
 import time
 import torch
 import numpy as np
 import shutil
 import argparse
 import gradio as gr
 import uuid
-import spaces
-subprocess.run(shlex.split("pip install wheel/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"))
-subprocess.run(shlex.split("pip install wheel/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl"))
-subprocess.run(shlex.split("pip install wheel/curope-0.0.0-cp310-cp310-linux_x86_64.whl"))
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 os.sys.path.append(os.path.abspath(os.path.join(BASE_DIR, "submodules", "dust3r")))
 # os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 from dust3r.inference import inference
@@ -253,7 +323,7 @@ with block:
         inputs=[input_path],
         outputs=[output_video, output_file, output_model],
         fn=lambda x: process(inputfiles=None, input_path=x),
-        cache_examples=True,
         label='Sparse-view Examples'
     )
 block.launch(server_name="0.0.0.0", share=False)

 import os, subprocess, shlex, sys, gc
 import time
+import ctypes
+import spaces
 import torch
 import numpy as np
 import shutil
 import argparse
 import gradio as gr
 import uuid
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CUDA_HOME = "/cuda-image/usr/local/cuda-13.0"
+CUDA_LIBDIR = os.path.join(CUDA_HOME, "lib64")
+@spaces.GPU(duration=600)
+def _first_gpu_setup():
+    need = {}
+    for name in ("diff_gaussian_rasterization", "simple_knn", "curope"):
+        try:
+            __import__(name)
+        except ImportError:
+            need[name] = True
+    if not need:
+        return
+    import tempfile
+    patch_dir = tempfile.mkdtemp(prefix="torch_cuda_patch_")
+    with open(os.path.join(patch_dir, "sitecustomize.py"), "w") as f:
+        f.write(
+            "try:\n"
+            "    import torch.utils.cpp_extension as _c\n"
+            "    _c._check_cuda_version = lambda *a, **k: None\n"
+            "except Exception:\n"
+            "    pass\n"
+        )
+    env = os.environ.copy()
+    env["CUDA_HOME"] = CUDA_HOME
+    env["CUDA_PATH"] = CUDA_HOME
+    env["PATH"] = os.path.join(CUDA_HOME, "bin") + os.pathsep + env.get("PATH", "")
+    env["PYTHONPATH"] = patch_dir + os.pathsep + env.get("PYTHONPATH", "")
+    env["TORCH_CUDA_ARCH_LIST"] = "12.0"
+    subprocess.check_call(
+        [sys.executable, "-m", "pip", "install", "--no-deps",
+         "setuptools", "wheel", "ninja", "packaging"],
+    )
+    if "diff_gaussian_rasterization" in need:
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install",
+             "--no-build-isolation", "--no-deps",
+             os.path.join(BASE_DIR, "submodules", "diff-gaussian-rasterization")],
+            env=env,
+        )
+    if "simple_knn" in need:
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install",
+             "--no-build-isolation", "--no-deps",
+             os.path.join(BASE_DIR, "submodules", "simple-knn")],
+            env=env,
+        )
+    if "curope" in need:
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install",
+             "--no-build-isolation", "--no-deps",
+             os.path.join(BASE_DIR, "submodules", "dust3r", "croco", "models", "curope")],
+            env=env,
+        )
+_first_gpu_setup()
+try:
+    ctypes.CDLL(os.path.join(CUDA_LIBDIR, "libcudart.so.13"), mode=ctypes.RTLD_GLOBAL)
+    os.environ["LD_LIBRARY_PATH"] = CUDA_LIBDIR + os.pathsep + os.environ.get("LD_LIBRARY_PATH", "")
+except OSError:
+    pass
+# torch 2.6+ flipped weights_only default — restore old behaviour for trusted checkpoints
+_orig_torch_load = torch.load
+torch.load = lambda *a, **k: _orig_torch_load(*a, **{**k, "weights_only": k.get("weights_only", False)})
 os.sys.path.append(os.path.abspath(os.path.join(BASE_DIR, "submodules", "dust3r")))
 # os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 from dust3r.inference import inference
         inputs=[input_path],
         outputs=[output_video, output_file, output_model],
         fn=lambda x: process(inputfiles=None, input_path=x),
+        cache_examples=False,
         label='Sparse-view Examples'
     )
 block.launch(server_name="0.0.0.0", share=False)

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-torch==2.2.0
 torchvision
 roma
 evo
-gradio==5.0.1
 matplotlib
 tqdm
 opencv-python
@@ -14,4 +14,4 @@ pyglet<2
 huggingface-hub[torch]>=0.22
 plyfile
 imageio[ffmpeg]
-spaces

+torch
 torchvision
 roma
 evo
+gradio
 matplotlib
 tqdm
 opencv-python
 huggingface-hub[torch]>=0.22
 plyfile
 imageio[ffmpeg]
+spaces

submodules/dust3r/croco/models/curope/kernels.cu CHANGED Viewed

@@ -98,7 +98,7 @@ void rope_2d_cuda( torch::Tensor tokens, const torch::Tensor pos, const float ba
     const int N_BLOCKS = B * N; // each block takes care of H*D values
     const int SHARED_MEM = sizeof(float) * (D + D/4);
-    AT_DISPATCH_FLOATING_TYPES_AND_HALF(tokens.type(), "rope_2d_cuda", ([&] {
         rope_2d_cuda_kernel<scalar_t> <<<N_BLOCKS, THREADS_PER_BLOCK, SHARED_MEM>>> (
             //tokens.data_ptr<scalar_t>(),
             tokens.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),

     const int N_BLOCKS = B * N; // each block takes care of H*D values
     const int SHARED_MEM = sizeof(float) * (D + D/4);
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(tokens.scalar_type(), "rope_2d_cuda", ([&] {
         rope_2d_cuda_kernel<scalar_t> <<<N_BLOCKS, THREADS_PER_BLOCK, SHARED_MEM>>> (
             //tokens.data_ptr<scalar_t>(),
             tokens.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),