Spaces:

HanzhouLiu
/

Stylos_Gradio

Running on Zero

App Files Files Community

HanzhouLiu commited on Nov 10, 2025

Commit

a6e928c

1 Parent(s): 3d936c4

Track all files under examples/ with Git LFS

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
LICENSE +21 -0
README.md +5 -8
app.py +272 -0
examples/demo_styles/00011395.png +3 -0
examples/demo_styles/00018289.png +3 -0
examples/demo_styles/00038427.png +3 -0
examples/demo_styles/00047052.png +3 -0
examples/demo_styles/00047819.png +3 -0
examples/demo_styles/00054987.png +3 -0
examples/demo_styles/00066540.png +3 -0
examples/demo_styles/00069352.png +3 -0
examples/demo_styles/00091988.png +3 -0
examples/demo_styles/1098.png +3 -0
examples/demo_styles/1414.png +3 -0
examples/demo_styles/1842.png +3 -0
examples/demo_styles/201.png +3 -0
examples/demo_styles/2190.png +3 -0
examples/demo_styles/23.jpeg +3 -0
examples/demo_styles/24.jpeg +3 -0
examples/demo_styles/5.jpeg +3 -0
examples/demo_styles/977.png +3 -0
examples/video/bungeenerf_colosseum.mp4 +3 -0
examples/video/dtu_scan_106.mp4 +3 -0
examples/video/fillerbuster_hand_hand.mp4 +3 -0
examples/video/fillerbuster_ramen.mp4 +3 -0
examples/video/fox.mp4 +3 -0
examples/video/horizongs_hillside_summer.mp4 +3 -0
examples/video/kitti360.mp4 +3 -0
examples/video/llff_fortress.mp4 +3 -0
examples/video/llff_horns.mp4 +3 -0
examples/video/matrixcity_street.mp4 +3 -0
examples/video/meganerf_rubble.mp4 +3 -0
examples/video/re10k_1eca36ec55b88fe4.mp4 +3 -0
examples/video/vrnerf_apartment.mp4 +3 -0
examples/video/vrnerf_kitchen.mp4 +3 -0
examples/video/vrnerf_riverview.mp4 +3 -0
examples/video/vrnerf_workshop.mp4 +3 -0
requirements.txt +38 -0
src/dataset/shims/normalize_shim.py +29 -0
src/dataset/types.py +51 -0
src/geometry/camera_emb.py +29 -0
src/geometry/projection.py +261 -0
src/misc/image_io.py +248 -0
src/misc/sh_rotation.py +111 -0
src/misc/sht.py +1637 -0
src/misc/utils.py +73 -0
src/model/decoder/__init__.py +12 -0
src/model/decoder/cuda_splatting.py +244 -0
src/model/decoder/decoder.py +47 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/** filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Hanzhou(Marco) Liu
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,10 @@
 ---
-title: Stylos Gradio
-emoji: 🏆
-colorFrom: yellow
-colorTo: purple
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Stylos Style Transfer
+emoji: 🎨
+colorFrom: pink
+colorTo: blue
 sdk: gradio
+sdk_version: 5.41.1
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Stylos 3D Stylization Demo — Pro Space Edition with Quota Limits
+Author: Hanzhou Liu
+"""
+# ===============================================================
+# ZeroGPU & Gradio Compatibility
+# ===============================================================
+import asyncio
+import gradio.queueing as grq
+if not hasattr(grq.Queue, "pending_message_lock") or not hasattr(grq.Queue.pending_message_lock, "__aenter__"):
+    grq.Queue.pending_message_lock = asyncio.Lock()
+# ===============================================================
+# Imports
+# ===============================================================
+import gc
+import os
+import shutil
+import sys
+import time
+from pathlib import Path
+from datetime import datetime
+from dataclasses import dataclass
+import cv2
+import torch
+import gradio as gr
+from PIL import Image
+from huggingface_hub import snapshot_download
+import spaces
+# ===============================================================
+# Project Imports
+# ===============================================================
+THIS_FILE = Path(__file__).resolve()
+PROJECT_ROOT = THIS_FILE.parent
+sys.path.append(str(PROJECT_ROOT))
+from src.misc.image_io import save_interpolated_video
+from src.model.model.stylos import Stylos
+from src.model.ply_export import export_ply
+from src.utils.image import process_image
+# ===============================================================
+# Constants
+# ===============================================================
+TMP_ROOT = Path("demo_tmp")
+TMP_ROOT.mkdir(exist_ok=True)
+EXAMPLES = [
+    ["examples/video/re10k_1eca36ec55b88fe4.mp4", "examples/demo_styles/23.jpeg"],
+    ["examples/video/bungeenerf_colosseum.mp4", "examples/demo_styles/24.jpeg"],
+    ["examples/video/fox.mp4", "examples/demo_styles/201.png"],
+    ["examples/video/vrnerf_apartment.mp4", "examples/demo_styles/977.png"],
+]
+# ===============================================================
+# Usage Limits
+# ===============================================================
+MAX_RUNS_PER_USER = 5       # Max runs per user per day
+MAX_GPU_TIME = 120          # Max GPU time per task (seconds)
+MAX_FRAMES_PER_RUN = 32     # Max frames per reconstruction
+_user_usage = {}            # Temporary quota memory (clears on restart)
+def check_user_quota(user_id: str):
+    """Track and enforce per-user daily quota."""
+    today = time.strftime("%Y-%m-%d")
+    key = f"{user_id}_{today}"
+    _user_usage[key] = _user_usage.get(key, 0) + 1
+    if _user_usage[key] > MAX_RUNS_PER_USER:
+        raise gr.Error(f"⚠️ You have reached your daily limit ({MAX_RUNS_PER_USER} runs). Please try again tomorrow.")
+    return f"✅ Run {_user_usage[key]} / {MAX_RUNS_PER_USER}"
+# ===============================================================
+# Model Container
+# ===============================================================
+@dataclass
+class ModelBundle:
+    stylos_model: Stylos
+    device: torch.device
+# ===============================================================
+# Utility Functions
+# ===============================================================
+def create_run_dir(base_dir: Path = TMP_ROOT) -> Path:
+    run_dir = base_dir / f"run_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
+    run_dir.mkdir(parents=True, exist_ok=True)
+    return run_dir
+def ensure_dir(path: Path, clear: bool = False):
+    if clear and path.exists():
+        shutil.rmtree(path)
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def empty_cuda():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+def ingest_content(video_input=None, reuse_dir=None):
+    """Extract frames from uploaded video."""
+    empty_cuda()
+    target_dir = reuse_dir if (reuse_dir and reuse_dir.exists()) else create_run_dir()
+    img_dir = ensure_dir(target_dir / "images", clear=True)
+    paths = []
+    if video_input:
+        src = Path(video_input if isinstance(video_input, str) else video_input["name"])
+        cap = cv2.VideoCapture(str(src))
+        fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+        interval = max(1, int(fps))
+        idx, frame_id = 0, 0
+        while True:
+            ok, frame = cap.read()
+            if not ok:
+                break
+            idx += 1
+            if idx % interval == 0:
+                outp = img_dir / f"{frame_id:06}.png"
+                cv2.imwrite(str(outp), frame)
+                paths.append(outp)
+                frame_id += 1
+        cap.release()
+    paths.sort()
+    return target_dir, paths
+def ingest_style(style_input, reuse_dir=None):
+    """Save uploaded style image to working directory."""
+    target_dir = reuse_dir if (reuse_dir and reuse_dir.exists()) else create_run_dir()
+    style_dir = ensure_dir(target_dir / "styles", clear=True)
+    dst = style_dir / "style.jpg"
+    if style_input:
+        Image.open(style_input).convert("RGB").save(dst)
+    return target_dir, [dst] if dst.exists() else []
+# ===============================================================
+# Inference
+# ===============================================================
+@spaces.GPU()
+def run_reconstruction(target_dir: Path, bundle: ModelBundle, user_id="guest"):
+    start_time = time.time()
+    check_user_quota(user_id)
+    if not target_dir.exists():
+        raise gr.Error("❌ Temporary directory not found.")
+    img_dir = target_dir / "images"
+    style_img = target_dir / "styles" / "style.jpg"
+    if not img_dir.exists() or not style_img.exists():
+        raise gr.Error("⚠️ Please upload both a content video and a style image.")
+    imgs = sorted([img_dir / f for f in os.listdir(img_dir)])
+    if len(imgs) > MAX_FRAMES_PER_RUN:
+        raise gr.Error(f"⚠️ Maximum {MAX_FRAMES_PER_RUN} frames allowed per run.")
+    tensors = [process_image(str(p)).to(bundle.device) for p in imgs]
+    content = torch.stack(tensors, dim=0).unsqueeze(0)
+    style = process_image(str(style_img)).unsqueeze(0).unsqueeze(0).to(bundle.device)
+    if time.time() - start_time > MAX_GPU_TIME:
+        raise gr.Error("⚠️ Exceeded GPU time limit. Please try a shorter sequence.")
+    with torch.no_grad():
+        gauss, pose_dict = bundle.stylos_model.inference(
+            (content + 1) * 0.5, style_image=(style + 1) * 0.5
+        )
+    extr, intr = pose_dict["extrinsic"], pose_dict["intrinsic"]
+    rgb_path, depth_path = save_interpolated_video(
+        extr, intr, 1, 448, 448, gauss, str(target_dir), bundle.stylos_model.decoder
+    )
+    ply_path = target_dir / "gaussians.ply"
+    export_ply(
+        gauss.means[0],
+        gauss.scales[0],
+        gauss.rotations[0],
+        gauss.harmonics[0],
+        gauss.opacities[0],
+        ply_path,
+        save_sh_dc_only=True,
+    )
+    empty_cuda()
+    return str(ply_path), rgb_path, depth_path
+# ===============================================================
+# Gradio Callbacks
+# ===============================================================
+def cb_update(video_input, style_input):
+    tdir, imgs = ingest_content(video_input)
+    tdir, styles = ingest_style(style_input, reuse_dir=tdir)
+    ok = len(imgs) and len(styles)
+    return str(tdir), [str(p) for p in imgs], str(styles[0]) if styles else None, gr.update(interactive=ok)
+def cb_reconstruct(target_dir_str):
+    from spaces import get_token_username
+    user = get_token_username() or "guest"
+    ply, rgb, depth = run_reconstruction(Path(target_dir_str), GLOBAL_BUNDLE, user)
+    return ply, rgb, depth
+# ===============================================================
+# UI
+# ===============================================================
+def create_interface():
+    theme = gr.themes.Soft()
+    with gr.Blocks(title="Stylos 3D Stylization Demo", theme=theme) as demo:
+        gr.Markdown("### 🎨 **Stylos 3D Stylization Demo (with Quota Limits)**")
+        run_dir_text = gr.Textbox(visible=False, value="None")
+        video_input = gr.Video(label="Upload Video", height=300)
+        style_input = gr.Image(label="Upload Style Image", type="filepath")
+        gallery = gr.Gallery(label="Extracted Frames", height=200)
+        reconstruct_btn = gr.Button("Reconstruct", variant="primary", interactive=False)
+        model3d = gr.Model3D(label="3D Gaussian Splat", height=400)
+        rgb_out = gr.Video(label="Stylized RGB")
+        depth_out = gr.Video(label="Depth")
+        video_input.change(cb_update, [video_input, style_input], [run_dir_text, gallery, style_input, reconstruct_btn])
+        style_input.change(cb_update, [video_input, style_input], [run_dir_text, gallery, style_input, reconstruct_btn])
+        reconstruct_btn.click(cb_reconstruct, [run_dir_text], [model3d, rgb_out, depth_out])
+    return demo
+# ===============================================================
+# Entry Point
+# ===============================================================
+GLOBAL_BUNDLE = None
+def main():
+    global GLOBAL_BUNDLE
+    print("🚀 Starting Stylos Demo with Quota Limits")
+    weights_dir = snapshot_download(
+        repo_id="HanzhouLiu/Stylos_Weights",
+        repo_type="dataset",
+        allow_patterns=["DL3DV/2025-10-09_16-10-03/*"],
+        token=False,
+    )
+    weights_dir = os.path.join(weights_dir, "DL3DV/2025-10-09_16-10-03")
+    print(f"✅ Checkpoint ready at: {weights_dir}")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = Stylos.from_pretrained(weights_dir).to(device)
+    model.eval()
+    for p in model.parameters():
+        p.requires_grad = False
+    GLOBAL_BUNDLE = ModelBundle(model, device)
+    demo = create_interface()
+    demo.queue(max_size=20).launch(show_error=True, ssr_mode=False)
+if __name__ == "__main__":
+    main()

examples/demo_styles/00011395.png ADDED Viewed

Git LFS Details

SHA256: f0ecf88adf7896cc453d48ab54b1f493bcd89a16629bd4fe68b0b80fa3e3d6bd
Pointer size: 131 Bytes
Size of remote file: 110 kB

examples/demo_styles/00018289.png ADDED Viewed

Git LFS Details

SHA256: a8b163ef02e4e37842084c7110c69256927a0b16ae69299b018bbc707b2f5703
Pointer size: 131 Bytes
Size of remote file: 118 kB

examples/demo_styles/00038427.png ADDED Viewed

Git LFS Details

SHA256: 2a3f3037c717c67465a1b03e99b0e92fe0294270ce657bb4fc9ba9c908683849
Pointer size: 131 Bytes
Size of remote file: 103 kB

examples/demo_styles/00047052.png ADDED Viewed

Git LFS Details

SHA256: bae75fb2476cf8a7008a68a693111f8988e6b6e047132e1d7fff1fb98e17fa87
Pointer size: 131 Bytes
Size of remote file: 132 kB

examples/demo_styles/00047819.png ADDED Viewed

Git LFS Details

SHA256: 6ffca4e8e794e34b36fc71076b871fa9545a4c6dfc906be53f724ec530c2a955
Pointer size: 131 Bytes
Size of remote file: 118 kB

examples/demo_styles/00054987.png ADDED Viewed

Git LFS Details

SHA256: ce942e67d605ed0c9f2dc67e240216a89f41168f9ef0e23a90da823c61bac575
Pointer size: 131 Bytes
Size of remote file: 146 kB

examples/demo_styles/00066540.png ADDED Viewed

Git LFS Details

SHA256: 81cc32dad07296d8cbbd72aad631f28a08b4d8e503270e92b3fdad22e094f617
Pointer size: 131 Bytes
Size of remote file: 121 kB

examples/demo_styles/00069352.png ADDED Viewed

Git LFS Details

SHA256: 0b2bad6fb8b6de3a49e428922ced7f419b3c0c412e7abdef4bbbb829ed6c05b2
Pointer size: 131 Bytes
Size of remote file: 110 kB

examples/demo_styles/00091988.png ADDED Viewed

Git LFS Details

SHA256: 79f119985dce1d1ef7bc4d778b0ab4712b81de8bb58e7ea8c9af671301fc5eeb
Pointer size: 130 Bytes
Size of remote file: 98.4 kB

examples/demo_styles/1098.png ADDED Viewed

Git LFS Details

SHA256: 1198d903907f3276a6df12226fbc03b9c931daaabe6df40931ee150348c9f85e
Pointer size: 131 Bytes
Size of remote file: 165 kB

examples/demo_styles/1414.png ADDED Viewed

Git LFS Details

SHA256: 020aa921fe5e72df4a8aa0ca98de86b8ef8699dc58a10dd2f332a7892cbf8475
Pointer size: 130 Bytes
Size of remote file: 74 kB

examples/demo_styles/1842.png ADDED Viewed

Git LFS Details

SHA256: 78193a86a7e6acad22f2e8f482fdd1395e094f6a764fcc76290e9c3b8e147b1f
Pointer size: 131 Bytes
Size of remote file: 256 kB

examples/demo_styles/201.png ADDED Viewed

Git LFS Details

SHA256: c0f2502c470dd398aab6368a6c829d55b63b82266c7f20d063208618eeb65760
Pointer size: 131 Bytes
Size of remote file: 147 kB

examples/demo_styles/2190.png ADDED Viewed

Git LFS Details

SHA256: 85c93031078cf0c90f073b9a8d90d8192d7f2491111a5980f362b1f7e8eab6c9
Pointer size: 131 Bytes
Size of remote file: 178 kB

examples/demo_styles/23.jpeg ADDED Viewed

Git LFS Details

SHA256: 1b28969f1364083063523603b2764b0a434e07a2cb6dfea7b073b733ce202c17
Pointer size: 130 Bytes
Size of remote file: 28.9 kB

examples/demo_styles/24.jpeg ADDED Viewed

Git LFS Details

SHA256: cc912a234a7b2b2447afbd3a8a4807dfd610ed94abf8f8c70acebd7df3aa58df
Pointer size: 130 Bytes
Size of remote file: 24.7 kB

examples/demo_styles/5.jpeg ADDED Viewed

Git LFS Details

SHA256: 09b0a91e961e61342a3e9adcf0ddb0a1c8c662e4be70c63ae2714dfce002eaa8
Pointer size: 130 Bytes
Size of remote file: 17.1 kB

examples/demo_styles/977.png ADDED Viewed

Git LFS Details

SHA256: 0034399cd1422020945fb69cecfb1c87218b23b11e0fecb99fb0566e25c77f5d
Pointer size: 131 Bytes
Size of remote file: 115 kB

examples/video/bungeenerf_colosseum.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:416b6af945547b5d19476823672de552944c7b5a147d29e9e8243e91a16aee3e
+size 329073

examples/video/dtu_scan_106.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16d7a06325cd368b134908e600a6c0741c7d0d188f1db690532b8ac85d65fef5
+size 352188

examples/video/fillerbuster_hand_hand.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b4ca982672bc92342b3e722c171d9d2e4d67a5a8116cd9f346956fbe01e253f
+size 319404

examples/video/fillerbuster_ramen.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60346a64a0a0d6805131d0d57edeeb0dae24f24c3f10560e95df65531221229
+size 660736

examples/video/fox.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3fa2ccff78e5d8085bb58f3def2d482e8df285ced5ef1b56abfe3766f0d90e0
+size 2361921

examples/video/horizongs_hillside_summer.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5dff78d9c00b3776bfca3a370061698bddead2ae940fe5a42d082ccf2ca80d1
+size 1606537

examples/video/kitti360.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c6b13929b2c2aae8b95921d8626f5be06f6afffe05ea4e47940ffeb9906f9fc
+size 1843629

examples/video/llff_fortress.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90ea046a0ec78651975529ebe6b9c72b60c19561fe61b15b15b9df0e44d9fe9a
+size 196243

examples/video/llff_horns.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc4c443c2a3f889f0c1283e98bd6a7026c36858fb37808bb2e8699ad1a2c1d8
+size 372570

examples/video/matrixcity_street.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa415f27177398b4e06f580beb3778701ca55784afade2fd6a058212213febc8
+size 3163684

examples/video/meganerf_rubble.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3410c759eb73ca2403ab8fe35d5ebabdbc25e3a0e67d8670a89fe17686246ed0
+size 450116

examples/video/re10k_1eca36ec55b88fe4.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3516eea797fe8035a7ff6d80098dfddd53a8d087dc3c00419d4192d73960d00
+size 35089

examples/video/vrnerf_apartment.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fdd5f165a4293cd95e3dd88d84b1f370decdd86308aa67a9d3832e01f4d6906
+size 2076392

examples/video/vrnerf_kitchen.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3db5d766ec86a7abdfe1f033b252337e6d934ea15035fafb4d0fc0c0e9e9740a
+size 775715

examples/video/vrnerf_riverview.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8187936cc49910ef330a37b1bbdab0076096d6c01f33b097c11937184de168
+size 768290

examples/video/vrnerf_workshop.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0f1334acc74bd70086a9be94d0c36838ebd7499af27f942c315e1ba282e285b
+size 1718918

requirements.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+trimesh
+numpy==1.25.0
+wheel
+tqdm
+lightning
+black
+ruff
+hydra-core
+jaxtyping
+beartype
+wandb
+einops
+colorama
+scikit-image
+colorspacious
+matplotlib
+moviepy
+imageio
+timm
+dacite
+lpips
+e3nn
+plyfile
+tabulate
+svg.py
+scikit-video
+opencv-python
+Pillow
+#xformers==0.0.24
+#huggingface-hub<0.14
+xformers
+moviepy==1.0.3
+pydantic
+open3d
+einops
+safetensors
+torch_scatter @ https://data.pyg.org/whl/torch-2.8.0%2Bcu128/torch_scatter-2.1.2%2Bpt28cu128-cp310-cp310-linux_x86_64.whl
+gsplat @ https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.3/gsplat-1.5.3+pt22cu121-cp310-cp310-linux_x86_64.whl

src/dataset/shims/normalize_shim.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import torch
+from einops import einsum, reduce, repeat
+from jaxtyping import Float
+from torch import Tensor
+from ..types import BatchedExample
+def inverse_normalize_image(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
+    mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device).view(-1, 1, 1)
+    std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device).view(-1, 1, 1)
+    return tensor * std + mean
+def normalize_image(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
+    mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device).view(-1, 1, 1)
+    std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device).view(-1, 1, 1)
+    return (tensor - mean) / std
+def apply_normalize_shim(
+    batch: BatchedExample,
+    mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
+    std: tuple[float, float, float] = (0.5, 0.5, 0.5),
+) -> BatchedExample:
+    batch["context"]["image"] = normalize_image(batch["context"]["image"], mean, std)
+    if "style_image" in batch["context"]:
+        batch["context"]["style_image"] = normalize_image(batch["context"]["style_image"], mean, std)
+    return batch

src/dataset/types.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from typing import Callable, Literal, TypedDict
+from jaxtyping import Float, Int64
+from torch import Tensor
+Stage = Literal["train", "val", "test"]
+# The following types mainly exist to make type-hinted keys show up in VS Code. Some
+# dimensions are annotated as "_" because either:
+# 1. They're expected to change as part of a function call (e.g., resizing the dataset).
+# 2. They're expected to vary within the same function call (e.g., the number of views,
+#    which differs between context and target BatchedViews).
+class BatchedViews(TypedDict, total=False):
+    extrinsics: Float[Tensor, "batch _ 4 4"]  # batch view 4 4
+    intrinsics: Float[Tensor, "batch _ 3 3"]  # batch view 3 3
+    image: Float[Tensor, "batch _ _ _ _"]  # batch view channel height width
+    near: Float[Tensor, "batch _"]  # batch view
+    far: Float[Tensor, "batch _"]  # batch view
+    index: Int64[Tensor, "batch _"]  # batch view
+    overlap: Float[Tensor, "batch _"]  # batch view
+class BatchedExample(TypedDict, total=False):
+    target: BatchedViews
+    context: BatchedViews
+    scene: list[str]
+class UnbatchedViews(TypedDict, total=False):
+    extrinsics: Float[Tensor, "_ 4 4"]
+    intrinsics: Float[Tensor, "_ 3 3"]
+    image: Float[Tensor, "_ 3 height width"]
+    near: Float[Tensor, " _"]
+    far: Float[Tensor, " _"]
+    index: Int64[Tensor, " _"]
+class UnbatchedExample(TypedDict, total=False):
+    target: UnbatchedViews
+    context: UnbatchedViews
+    scene: str
+# A data shim modifies the example after it's been returned from the data loader.
+DataShim = Callable[[BatchedExample], BatchedExample]
+AnyExample = BatchedExample | UnbatchedExample
+AnyViews = BatchedViews | UnbatchedViews

src/geometry/camera_emb.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from einops import rearrange
+from .projection import sample_image_grid, get_local_rays
+from ..misc.sht import rsh_cart_2, rsh_cart_4, rsh_cart_6, rsh_cart_8
+def get_intrinsic_embedding(context, degree=0, downsample=1, merge_hw=False):
+    assert degree in [0, 2, 4, 8]
+    b, v, _, h, w = context["image"].shape
+    device = context["image"].device
+    tgt_h, tgt_w = h // downsample, w // downsample
+    xy_ray, _ = sample_image_grid((tgt_h, tgt_w), device)
+    xy_ray = xy_ray[None, None, ...].expand(b, v, -1, -1, -1)  # [b, v, h, w, 2]
+    directions = get_local_rays(xy_ray, rearrange(context["intrinsics"], "b v i j -> b v () () i j"),)
+    if degree == 2:
+        directions = rsh_cart_2(directions)
+    elif degree == 4:
+        directions = rsh_cart_4(directions)
+    elif degree == 8:
+        directions = rsh_cart_8(directions)
+    if merge_hw:
+        directions = rearrange(directions, "b v h w d -> b v (h w) d")
+    else:
+        directions = rearrange(directions, "b v h w d -> b v d h w")
+    return directions

src/geometry/projection.py ADDED Viewed

	@@ -0,0 +1,261 @@

+from math import prod
+import torch
+from einops import einsum, rearrange, reduce, repeat
+from jaxtyping import Bool, Float, Int64
+from torch import Tensor
+def homogenize_points(
+    points: Float[Tensor, "*batch dim"],
+) -> Float[Tensor, "*batch dim+1"]:
+    """Convert batched points (xyz) to (xyz1)."""
+    return torch.cat([points, torch.ones_like(points[..., :1])], dim=-1)
+def homogenize_vectors(
+    vectors: Float[Tensor, "*batch dim"],
+) -> Float[Tensor, "*batch dim+1"]:
+    """Convert batched vectors (xyz) to (xyz0)."""
+    return torch.cat([vectors, torch.zeros_like(vectors[..., :1])], dim=-1)
+def transform_rigid(
+    homogeneous_coordinates: Float[Tensor, "*#batch dim"],
+    transformation: Float[Tensor, "*#batch dim dim"],
+) -> Float[Tensor, "*batch dim"]:
+    """Apply a rigid-body transformation to points or vectors."""
+    return einsum(transformation, homogeneous_coordinates, "... i j, ... j -> ... i")
+def transform_cam2world(
+    homogeneous_coordinates: Float[Tensor, "*#batch dim"],
+    extrinsics: Float[Tensor, "*#batch dim dim"],
+) -> Float[Tensor, "*batch dim"]:
+    """Transform points from 3D camera coordinates to 3D world coordinates."""
+    return transform_rigid(homogeneous_coordinates, extrinsics)
+def transform_world2cam(
+    homogeneous_coordinates: Float[Tensor, "*#batch dim"],
+    extrinsics: Float[Tensor, "*#batch dim dim"],
+) -> Float[Tensor, "*batch dim"]:
+    """Transform points from 3D world coordinates to 3D camera coordinates."""
+    return transform_rigid(homogeneous_coordinates, extrinsics.inverse())
+def project_camera_space(
+    points: Float[Tensor, "*#batch dim"],
+    intrinsics: Float[Tensor, "*#batch dim dim"],
+    epsilon: float = torch.finfo(torch.float32).eps,
+    infinity: float = 1e8,
+) -> Float[Tensor, "*batch dim-1"]:
+    points = points / (points[..., -1:] + epsilon)
+    points = points.nan_to_num(posinf=infinity, neginf=-infinity)
+    points = einsum(intrinsics, points, "... i j, ... j -> ... i")
+    return points[..., :-1]
+def project(
+    points: Float[Tensor, "*#batch dim"],
+    extrinsics: Float[Tensor, "*#batch dim+1 dim+1"],
+    intrinsics: Float[Tensor, "*#batch dim dim"],
+    epsilon: float = torch.finfo(torch.float32).eps,
+) -> tuple[
+    Float[Tensor, "*batch dim-1"],  # xy coordinates
+    Bool[Tensor, " *batch"],  # whether points are in front of the camera
+]:
+    points = homogenize_points(points)
+    points = transform_world2cam(points, extrinsics)[..., :-1]
+    in_front_of_camera = points[..., -1] >= 0
+    return project_camera_space(points, intrinsics, epsilon=epsilon), in_front_of_camera
+def unproject(
+    coordinates: Float[Tensor, "*#batch dim"],
+    z: Float[Tensor, "*#batch"],
+    intrinsics: Float[Tensor, "*#batch dim+1 dim+1"],
+) -> Float[Tensor, "*batch dim+1"]:
+    """Unproject 2D camera coordinates with the given Z values."""
+    # Apply the inverse intrinsics to the coordinates.
+    coordinates = homogenize_points(coordinates)
+    ray_directions = einsum(
+        intrinsics.inverse(), coordinates, "... i j, ... j -> ... i"
+    )
+    # Apply the supplied depth values.
+    return ray_directions * z[..., None]
+def get_world_rays(
+    coordinates: Float[Tensor, "*#batch dim"],
+    extrinsics: Float[Tensor, "*#batch dim+2 dim+2"],
+    intrinsics: Float[Tensor, "*#batch dim+1 dim+1"],
+) -> tuple[
+    Float[Tensor, "*batch dim+1"],  # origins
+    Float[Tensor, "*batch dim+1"],  # directions
+]:
+    # Get camera-space ray directions.
+    directions = unproject(
+        coordinates,
+        torch.ones_like(coordinates[..., 0]),
+        intrinsics,
+    )
+    directions = directions / directions.norm(dim=-1, keepdim=True)
+    # Transform ray directions to world coordinates.
+    directions = homogenize_vectors(directions)
+    directions = transform_cam2world(directions, extrinsics)[..., :-1]
+    # Tile the ray origins to have the same shape as the ray directions.
+    origins = extrinsics[..., :-1, -1].broadcast_to(directions.shape)
+    return origins, directions
+def get_local_rays(
+    coordinates: Float[Tensor, "*#batch dim"],
+    intrinsics: Float[Tensor, "*#batch dim+1 dim+1"],
+) -> Float[Tensor, "*batch dim+1"]:
+    # Get camera-space ray directions.
+    directions = unproject(
+        coordinates,
+        torch.ones_like(coordinates[..., 0]),
+        intrinsics,
+    )
+    directions = directions / directions.norm(dim=-1, keepdim=True)
+    return directions
+def sample_image_grid(
+    shape: tuple[int, ...],
+    device: torch.device = torch.device("cpu"),
+) -> tuple[
+    Float[Tensor, "*shape dim"],  # float coordinates (xy indexing)
+    Int64[Tensor, "*shape dim"],  # integer indices (ij indexing)
+]:
+    """Get normalized (range 0 to 1) coordinates and integer indices for an image."""
+    # Each entry is a pixel-wise integer coordinate. In the 2D case, each entry is a
+    # (row, col) coordinate.
+    indices = [torch.arange(length, device=device) for length in shape]
+    stacked_indices = torch.stack(torch.meshgrid(*indices, indexing="ij"), dim=-1)
+    # Each entry is a floating-point coordinate in the range (0, 1). In the 2D case,
+    # each entry is an (x, y) coordinate.
+    coordinates = [(idx + 0.5) / length for idx, length in zip(indices, shape)]
+    coordinates = reversed(coordinates)
+    coordinates = torch.stack(torch.meshgrid(*coordinates, indexing="xy"), dim=-1)
+    return coordinates, stacked_indices
+def sample_training_rays(
+    image: Float[Tensor, "batch view channel ..."],
+    intrinsics: Float[Tensor, "batch view dim dim"],
+    extrinsics: Float[Tensor, "batch view dim+1 dim+1"],
+    num_rays: int,
+) -> tuple[
+    Float[Tensor, "batch ray dim"],  # origins
+    Float[Tensor, "batch ray dim"],  # directions
+    Float[Tensor, "batch ray 3"],  # sampled color
+]:
+    device = extrinsics.device
+    b, v, _, *grid_shape = image.shape
+    # Generate all possible target rays.
+    xy, _ = sample_image_grid(tuple(grid_shape), device)
+    origins, directions = get_world_rays(
+        rearrange(xy, "... d -> ... () () d"),
+        extrinsics,
+        intrinsics,
+    )
+    origins = rearrange(origins, "... b v xy -> b (v ...) xy", b=b, v=v)
+    directions = rearrange(directions, "... b v xy -> b (v ...) xy", b=b, v=v)
+    pixels = rearrange(image, "b v c ... -> b (v ...) c")
+    # Sample random rays.
+    num_possible_rays = v * prod(grid_shape)
+    ray_indices = torch.randint(num_possible_rays, (b, num_rays), device=device)
+    batch_indices = repeat(torch.arange(b, device=device), "b -> b n", n=num_rays)
+    return (
+        origins[batch_indices, ray_indices],
+        directions[batch_indices, ray_indices],
+        pixels[batch_indices, ray_indices],
+    )
+def intersect_rays(
+    origins_x: Float[Tensor, "*#batch 3"],
+    directions_x: Float[Tensor, "*#batch 3"],
+    origins_y: Float[Tensor, "*#batch 3"],
+    directions_y: Float[Tensor, "*#batch 3"],
+    eps: float = 1e-5,
+    inf: float = 1e10,
+) -> Float[Tensor, "*batch 3"]:
+    """Compute the least-squares intersection of rays. Uses the math from here:
+    https://math.stackexchange.com/a/1762491/286022
+    """
+    # Broadcast the rays so their shapes match.
+    shape = torch.broadcast_shapes(
+        origins_x.shape,
+        directions_x.shape,
+        origins_y.shape,
+        directions_y.shape,
+    )
+    origins_x = origins_x.broadcast_to(shape)
+    directions_x = directions_x.broadcast_to(shape)
+    origins_y = origins_y.broadcast_to(shape)
+    directions_y = directions_y.broadcast_to(shape)
+    # Detect and remove batch elements where the directions are parallel.
+    parallel = einsum(directions_x, directions_y, "... xyz, ... xyz -> ...") > 1 - eps
+    origins_x = origins_x[~parallel]
+    directions_x = directions_x[~parallel]
+    origins_y = origins_y[~parallel]
+    directions_y = directions_y[~parallel]
+    # Stack the rays into (2, *shape).
+    origins = torch.stack([origins_x, origins_y], dim=0)
+    directions = torch.stack([directions_x, directions_y], dim=0)
+    dtype = origins.dtype
+    device = origins.device
+    # Compute n_i * n_i^T - eye(3) from the equation.
+    n = einsum(directions, directions, "r b i, r b j -> r b i j")
+    n = n - torch.eye(3, dtype=dtype, device=device).broadcast_to((2, 1, 3, 3))
+    # Compute the left-hand side of the equation.
+    lhs = reduce(n, "r b i j -> b i j", "sum")
+    # Compute the right-hand side of the equation.
+    rhs = einsum(n, origins, "r b i j, r b j -> r b i")
+    rhs = reduce(rhs, "r b i -> b i", "sum")
+    # Left-matrix-multiply both sides by the pseudo-inverse of lhs to find p.
+    result = torch.linalg.lstsq(lhs, rhs).solution
+    # Handle the case of parallel lines by setting depth to infinity.
+    result_all = torch.ones(shape, dtype=dtype, device=device) * inf
+    result_all[~parallel] = result
+    return result_all
+def get_fov(intrinsics: Float[Tensor, "batch 3 3"]) -> Float[Tensor, "batch 2"]:
+    intrinsics_inv = intrinsics.inverse()
+    def process_vector(vector):
+        vector = torch.tensor(vector, dtype=torch.float32, device=intrinsics.device)
+        vector = einsum(intrinsics_inv, vector, "b i j, j -> b i")
+        return vector / vector.norm(dim=-1, keepdim=True)
+    left = process_vector([0, 0.5, 1])
+    right = process_vector([1, 0.5, 1])
+    top = process_vector([0.5, 0, 1])
+    bottom = process_vector([0.5, 1, 1])
+    fov_x = (left * right).sum(dim=-1).acos()
+    fov_y = (top * bottom).sum(dim=-1).acos()
+    return torch.stack((fov_x, fov_y), dim=-1)

src/misc/image_io.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import io
+import os
+from pathlib import Path
+from typing import Union
+import cv2
+import imageio
+import numpy as np
+import skvideo
+import torch
+import torchvision.transforms as tf
+from einops import rearrange, repeat
+from jaxtyping import Float, UInt8
+from matplotlib import pyplot as plt
+from matplotlib.figure import Figure
+from PIL import Image
+from torch import Tensor
+FloatImage = Union[
+    Float[Tensor, "height width"],
+    Float[Tensor, "channel height width"],
+    Float[Tensor, "batch channel height width"],
+]
+def fig_to_image(
+    fig: Figure,
+    dpi: int = 100,
+    device: torch.device = torch.device("cpu"),
+) -> Float[Tensor, "3 height width"]:
+    buffer = io.BytesIO()
+    fig.savefig(buffer, format="raw", dpi=dpi)
+    buffer.seek(0)
+    data = np.frombuffer(buffer.getvalue(), dtype=np.uint8)
+    h = int(fig.bbox.bounds[3])
+    w = int(fig.bbox.bounds[2])
+    data = rearrange(data, "(h w c) -> c h w", h=h, w=w, c=4)
+    buffer.close()
+    return (torch.tensor(data, device=device, dtype=torch.float32) / 255)[:3]
+def prep_image(image: FloatImage) -> UInt8[np.ndarray, "height width channel"]:
+    # Handle batched images.
+    if image.ndim == 4:
+        image = rearrange(image, "b c h w -> c h (b w)")
+    # Handle single-channel images.
+    if image.ndim == 2:
+        image = rearrange(image, "h w -> () h w")
+    # Ensure that there are 3 or 4 channels.
+    channel, _, _ = image.shape
+    if channel == 1:
+        image = repeat(image, "() h w -> c h w", c=3)
+    assert image.shape[0] in (3, 4)
+    image = (image.detach().clip(min=0, max=1) * 255).type(torch.uint8)
+    return rearrange(image, "c h w -> h w c").cpu().numpy()
+def save_image(
+    image: FloatImage,
+    path: Union[Path, str],
+) -> None:
+    """Save an image. Assumed to be in range 0-1."""
+    # Create the parent directory if it doesn't already exist.
+    path = Path(path)
+    path.parent.mkdir(exist_ok=True, parents=True)
+    # Save the image.
+    Image.fromarray(prep_image(image)).save(path)
+def load_image(
+    path: Union[Path, str],
+) -> Float[Tensor, "3 height width"]:
+    return tf.ToTensor()(Image.open(path))[:3]
+def save_video(tensor, save_path, fps=10):
+    """
+    Save a tensor of shape (N, C, H, W) as a video file using imageio.
+    Args:
+        tensor: Tensor of shape (N, C, H, W) in range [0, 1]
+        save_path: Path to save the video file
+        fps: Frames per second for the video
+    """
+    # Convert tensor to numpy array and adjust dimensions
+    video = tensor.cpu().detach().numpy()  # (N, C, H, W)
+    video = np.transpose(video, (0, 2, 3, 1))  # (N, H, W, C)
+    # Scale to [0, 255] and convert to uint8
+    video = (video * 255).astype(np.uint8)
+    # Ensure the directory exists
+    import os
+    os.makedirs(os.path.dirname(save_path), exist_ok=True)
+    # Use imageio to write video (handles codec compatibility automatically)
+    import imageio
+    writer = imageio.get_writer(save_path, fps=fps)
+    for frame in video:
+        writer.append_data(frame)
+    writer.close()
+def save_images(tensor, save_path):
+    """
+    Save a tensor of shape (N, C, H, W) as a series of images using imageio.
+    Args:
+        tensor: Tensor of shape (N, C, H, W) in range [0, 1]
+        save_path: Path to save the video file
+    """
+    # Convert tensor to numpy array and adjust dimensions
+    images = tensor.cpu().detach().numpy()  # (N, C, H, W)
+    images = np.transpose(images, (0, 2, 3, 1))  # (N, H, W, C)
+    # Scale to [0, 255] and convert to uint8
+    images = (images * 255).astype(np.uint8)
+    os.makedirs(save_path, exist_ok=True)
+    # save image in the folder
+    for i, img in enumerate(images):
+        imageio.imwrite(os.path.join(save_path, f"{i:03d}.png"), img)
+def save_interpolated_video(
+    pred_extrinsics, pred_intrinsics, b, h, w, gaussians, save_path, decoder_func, t=10,
+    save_rgb_video=True, save_depth_video=True, save_rgb=False, save_depth=False,
+    save_name=""
+):
+    # Interpolate between neighboring frames
+    # t: Number of extra views to interpolate between each pair
+    interpolated_extrinsics = []
+    interpolated_intrinsics = []
+    if pred_extrinsics.shape[1]==1:
+        # If there's only one frame, just duplicate it
+        for _ in range(t):
+            interpolated_extrinsics.append(pred_extrinsics[:, 0].unsqueeze(1))
+            interpolated_intrinsics.append(pred_intrinsics[:, 0].unsqueeze(1))
+    else:
+        # For each pair of neighboring frame
+        for i in range(pred_extrinsics.shape[1] - 1):
+            # Add the current frame
+            interpolated_extrinsics.append(pred_extrinsics[:, i : i + 1])
+            interpolated_intrinsics.append(pred_intrinsics[:, i : i + 1])
+            # Interpolate between current and next frame
+            for j in range(1, t + 1):
+                alpha = j / (t + 1)
+                # Interpolate extrinsics
+                start_extrinsic = pred_extrinsics[:, i]
+                end_extrinsic = pred_extrinsics[:, i + 1]
+                # Separate rotation and translation
+                start_rot = start_extrinsic[:, :3, :3]
+                end_rot = end_extrinsic[:, :3, :3]
+                start_trans = start_extrinsic[:, :3, 3]
+                end_trans = end_extrinsic[:, :3, 3]
+                # Interpolate translation (linear)
+                interp_trans = (1 - alpha) * start_trans + alpha * end_trans
+                # Interpolate rotation (spherical)
+                start_rot_flat = start_rot.reshape(b, 9)
+                end_rot_flat = end_rot.reshape(b, 9)
+                interp_rot_flat = (1 - alpha) * start_rot_flat + alpha * end_rot_flat
+                interp_rot = interp_rot_flat.reshape(b, 3, 3)
+                # Normalize rotation matrix to ensure it's orthogonal
+                u, _, v = torch.svd(interp_rot)
+                interp_rot = torch.bmm(u, v.transpose(1, 2))
+                # Combine interpolated rotation and translation
+                interp_extrinsic = (
+                    torch.eye(4, device=pred_extrinsics.device).unsqueeze(0).repeat(b, 1, 1)
+                )
+                interp_extrinsic[:, :3, :3] = interp_rot
+                interp_extrinsic[:, :3, 3] = interp_trans
+                # Interpolate intrinsics (linear)
+                start_intrinsic = pred_intrinsics[:, i]
+                end_intrinsic = pred_intrinsics[:, i + 1]
+                interp_intrinsic = (1 - alpha) * start_intrinsic + alpha * end_intrinsic
+                # Add interpolated frame
+                interpolated_extrinsics.append(interp_extrinsic.unsqueeze(1))
+                interpolated_intrinsics.append(interp_intrinsic.unsqueeze(1))
+    # Concatenate all frames
+    pred_all_extrinsic = torch.cat(interpolated_extrinsics, dim=1)
+    pred_all_intrinsic = torch.cat(interpolated_intrinsics, dim=1)
+    print(pred_all_extrinsic.shape, pred_all_intrinsic.shape)
+    # Add the last frame
+    interpolated_extrinsics.append(pred_all_extrinsic[:, -1:])
+    interpolated_intrinsics.append(pred_all_intrinsic[:, -1:])
+    print(len(interpolated_extrinsics), len(interpolated_intrinsics))
+    # Update K to reflect the new number of frames
+    num_frames = pred_all_extrinsic.shape[1]
+    # Render interpolated views
+    interpolated_output = decoder_func.forward(
+        gaussians,
+        pred_all_extrinsic,
+        pred_all_intrinsic.float(),
+        torch.ones(1, num_frames, device=pred_all_extrinsic.device) * 0.1,
+        torch.ones(1, num_frames, device=pred_all_extrinsic.device) * 100,
+        (h, w),
+    )
+    # Convert to video format
+    video = interpolated_output.color[0].clip(min=0, max=1)
+    depth = interpolated_output.depth[0]
+    # Normalize depth for visualization
+    # to avoid `quantile() input tensor is too large`
+    num_views = pred_extrinsics.shape[1]
+    depth_norm = (depth - depth[::num_views].quantile(0.01)) / (
+        depth[::num_views].quantile(0.99) - depth[::num_views].quantile(0.01)
+    )
+    depth_norm = plt.cm.turbo(depth_norm.cpu().numpy())
+    depth_colored = (
+        torch.from_numpy(depth_norm[..., :3]).permute(0, 3, 1, 2).to(depth.device)
+    )
+    depth_colored = depth_colored.clip(min=0, max=1)
+    # Save depth video
+    if save_depth_video:
+        save_video(depth_colored, os.path.join(save_path, f"{save_name}depth.mp4"))
+    if save_rgb_video:
+        save_video(video, os.path.join(save_path, f"{save_name}rgb.mp4"))
+    # Save video
+    if save_rgb:
+        save_images(video, os.path.join(save_path, f"{save_name}rgb_frames"))
+    if save_depth:
+        save_images(depth_colored, os.path.join(save_path, f"{save_name}depth_frames"))
+    return os.path.join(save_path, f"{save_name}rgb.mp4"), os.path.join(save_path, f"{save_name}depth.mp4")

src/misc/sh_rotation.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from math import isqrt
+import torch
+from e3nn.o3 import matrix_to_angles, wigner_D
+from einops import einsum
+from jaxtyping import Float
+from torch import Tensor
+def rotate_sh(
+    sh_coefficients: Float[Tensor, "*#batch n"],
+    rotations: Float[Tensor, "*#batch 3 3"],
+) -> Float[Tensor, "*batch n"]:
+    device = sh_coefficients.device
+    dtype = sh_coefficients.dtype
+    # change the basis from YZX -> XYZ to fit the convention of e3nn
+    P = torch.tensor([[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                     dtype=sh_coefficients.dtype, device=sh_coefficients.device)
+    inversed_P = torch.tensor([[0, 1, 0], [0, 0, 1], [1, 0, 0], ],
+                              dtype=sh_coefficients.dtype, device=sh_coefficients.device)
+    permuted_rotation_matrix = inversed_P @ rotations @ P
+    *_, n = sh_coefficients.shape
+    alpha, beta, gamma = matrix_to_angles(permuted_rotation_matrix)
+    result = []
+    for degree in range(isqrt(n)):
+        with torch.device(device):
+            sh_rotations = wigner_D(degree, alpha, -beta, gamma).type(dtype)
+        sh_rotated = einsum(
+            sh_rotations,
+            sh_coefficients[..., degree**2 : (degree + 1) ** 2],
+            "... i j, ... j -> ... i",
+        )
+        result.append(sh_rotated)
+    return torch.cat(result, dim=-1)
+# def rotate_sh(
+#     sh_coefficients: Float[Tensor, "*#batch n"],
+#     rotations: Float[Tensor, "*#batch 3 3"],
+# ) -> Float[Tensor, "*batch n"]:
+#     device = sh_coefficients.device
+#     dtype = sh_coefficients.dtype
+#
+#     *_, n = sh_coefficients.shape
+#     alpha, beta, gamma = matrix_to_angles(rotations)
+#     result = []
+#     for degree in range(isqrt(n)):
+#         with torch.device(device):
+#             sh_rotations = wigner_D(degree, alpha, beta, gamma).type(dtype)
+#         sh_rotated = einsum(
+#             sh_rotations,
+#             sh_coefficients[..., degree**2 : (degree + 1) ** 2],
+#             "... i j, ... j -> ... i",
+#         )
+#         result.append(sh_rotated)
+#
+#     return torch.cat(result, dim=-1)
+if __name__ == "__main__":
+    from pathlib import Path
+    import matplotlib.pyplot as plt
+    from e3nn.o3 import spherical_harmonics
+    from matplotlib import cm
+    from scipy.spatial.transform.rotation import Rotation as R
+    device = torch.device("cuda")
+    # Generate random spherical harmonics coefficients.
+    degree = 4
+    coefficients = torch.rand((degree + 1) ** 2, dtype=torch.float32, device=device)
+    def plot_sh(sh_coefficients, path: Path) -> None:
+        phi = torch.linspace(0, torch.pi, 100, device=device)
+        theta = torch.linspace(0, 2 * torch.pi, 100, device=device)
+        phi, theta = torch.meshgrid(phi, theta, indexing="xy")
+        x = torch.sin(phi) * torch.cos(theta)
+        y = torch.sin(phi) * torch.sin(theta)
+        z = torch.cos(phi)
+        xyz = torch.stack([x, y, z], dim=-1)
+        sh = spherical_harmonics(list(range(degree + 1)), xyz, True)
+        result = einsum(sh, sh_coefficients, "... n, n -> ...")
+        result = (result - result.min()) / (result.max() - result.min())
+        # Set the aspect ratio to 1 so our sphere looks spherical
+        fig = plt.figure(figsize=plt.figaspect(1.0))
+        ax = fig.add_subplot(111, projection="3d")
+        ax.plot_surface(
+            x.cpu().numpy(),
+            y.cpu().numpy(),
+            z.cpu().numpy(),
+            rstride=1,
+            cstride=1,
+            facecolors=cm.seismic(result.cpu().numpy()),
+        )
+        # Turn off the axis planes
+        ax.set_axis_off()
+        path.parent.mkdir(exist_ok=True, parents=True)
+        plt.savefig(path)
+    for i, angle in enumerate(torch.linspace(0, 2 * torch.pi, 30)):
+        rotation = torch.tensor(
+            R.from_euler("x", angle.item()).as_matrix(), device=device
+        )
+        plot_sh(rotate_sh(coefficients, rotation), Path(f"sh_rotation/{i:0>3}.png"))
+    print("Done!")

src/misc/sht.py ADDED Viewed

	@@ -0,0 +1,1637 @@

+"""Real spherical harmonics in Cartesian form for PyTorch.
+This is an autogenerated file. See
+https://github.com/cheind/torch-spherical-harmonics
+for more information.
+"""
+import torch
+def rsh_cart_0(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 0.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,1) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+        ],
+        -1,
+    )
+def rsh_cart_1(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 1.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,4) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+        ],
+        -1,
+    )
+def rsh_cart_2(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 2.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,9) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+        ],
+        -1,
+    )
+def rsh_cart_3(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 3.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,16) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+            -0.590043589926644 * y * (3.0 * x2 - y2),
+            2.89061144264055 * xy * z,
+            0.304697199642977 * y * (1.5 - 7.5 * z2),
+            1.24392110863372 * z * (1.5 * z2 - 0.5) - 0.497568443453487 * z,
+            0.304697199642977 * x * (1.5 - 7.5 * z2),
+            1.44530572132028 * z * (x2 - y2),
+            -0.590043589926644 * x * (x2 - 3.0 * y2),
+        ],
+        -1,
+    )
+def rsh_cart_4(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 4.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,25) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    x4 = x2**2
+    y4 = y2**2
+    z4 = z2**2
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+            -0.590043589926644 * y * (3.0 * x2 - y2),
+            2.89061144264055 * xy * z,
+            0.304697199642977 * y * (1.5 - 7.5 * z2),
+            1.24392110863372 * z * (1.5 * z2 - 0.5) - 0.497568443453487 * z,
+            0.304697199642977 * x * (1.5 - 7.5 * z2),
+            1.44530572132028 * z * (x2 - y2),
+            -0.590043589926644 * x * (x2 - 3.0 * y2),
+            2.5033429417967 * xy * (x2 - y2),
+            -1.77013076977993 * yz * (3.0 * x2 - y2),
+            0.126156626101008 * xy * (52.5 * z2 - 7.5),
+            0.267618617422916 * y * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            1.48099765681286
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            - 0.952069922236839 * z2
+            + 0.317356640745613,
+            0.267618617422916 * x * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            0.063078313050504 * (x2 - y2) * (52.5 * z2 - 7.5),
+            -1.77013076977993 * xz * (x2 - 3.0 * y2),
+            -3.75501441269506 * x2 * y2
+            + 0.625835735449176 * x4
+            + 0.625835735449176 * y4,
+        ],
+        -1,
+    )
+def rsh_cart_5(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 5.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,36) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    x4 = x2**2
+    y4 = y2**2
+    z4 = z2**2
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+            -0.590043589926644 * y * (3.0 * x2 - y2),
+            2.89061144264055 * xy * z,
+            0.304697199642977 * y * (1.5 - 7.5 * z2),
+            1.24392110863372 * z * (1.5 * z2 - 0.5) - 0.497568443453487 * z,
+            0.304697199642977 * x * (1.5 - 7.5 * z2),
+            1.44530572132028 * z * (x2 - y2),
+            -0.590043589926644 * x * (x2 - 3.0 * y2),
+            2.5033429417967 * xy * (x2 - y2),
+            -1.77013076977993 * yz * (3.0 * x2 - y2),
+            0.126156626101008 * xy * (52.5 * z2 - 7.5),
+            0.267618617422916 * y * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            1.48099765681286
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            - 0.952069922236839 * z2
+            + 0.317356640745613,
+            0.267618617422916 * x * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            0.063078313050504 * (x2 - y2) * (52.5 * z2 - 7.5),
+            -1.77013076977993 * xz * (x2 - 3.0 * y2),
+            -3.75501441269506 * x2 * y2
+            + 0.625835735449176 * x4
+            + 0.625835735449176 * y4,
+            -0.65638205684017 * y * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            8.30264925952416 * xy * z * (x2 - y2),
+            0.00931882475114763 * y * (52.5 - 472.5 * z2) * (3.0 * x2 - y2),
+            0.0913054625709205 * xy * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.241571547304372
+            * y
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            -1.24747010616985 * z * (1.5 * z2 - 0.5)
+            + 1.6840846433293
+            * z
+            * (
+                1.75
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                - 1.125 * z2
+                + 0.375
+            )
+            + 0.498988042467941 * z,
+            0.241571547304372
+            * x
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            0.0456527312854602 * (x2 - y2) * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.00931882475114763 * x * (52.5 - 472.5 * z2) * (x2 - 3.0 * y2),
+            2.07566231488104 * z * (-6.0 * x2 * y2 + x4 + y4),
+            -0.65638205684017 * x * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+        ],
+        -1,
+    )
+def rsh_cart_6(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 6.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,49) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    x4 = x2**2
+    y4 = y2**2
+    z4 = z2**2
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+            -0.590043589926644 * y * (3.0 * x2 - y2),
+            2.89061144264055 * xy * z,
+            0.304697199642977 * y * (1.5 - 7.5 * z2),
+            1.24392110863372 * z * (1.5 * z2 - 0.5) - 0.497568443453487 * z,
+            0.304697199642977 * x * (1.5 - 7.5 * z2),
+            1.44530572132028 * z * (x2 - y2),
+            -0.590043589926644 * x * (x2 - 3.0 * y2),
+            2.5033429417967 * xy * (x2 - y2),
+            -1.77013076977993 * yz * (3.0 * x2 - y2),
+            0.126156626101008 * xy * (52.5 * z2 - 7.5),
+            0.267618617422916 * y * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            1.48099765681286
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            - 0.952069922236839 * z2
+            + 0.317356640745613,
+            0.267618617422916 * x * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            0.063078313050504 * (x2 - y2) * (52.5 * z2 - 7.5),
+            -1.77013076977993 * xz * (x2 - 3.0 * y2),
+            -3.75501441269506 * x2 * y2
+            + 0.625835735449176 * x4
+            + 0.625835735449176 * y4,
+            -0.65638205684017 * y * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            8.30264925952416 * xy * z * (x2 - y2),
+            0.00931882475114763 * y * (52.5 - 472.5 * z2) * (3.0 * x2 - y2),
+            0.0913054625709205 * xy * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.241571547304372
+            * y
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            -1.24747010616985 * z * (1.5 * z2 - 0.5)
+            + 1.6840846433293
+            * z
+            * (
+                1.75
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                - 1.125 * z2
+                + 0.375
+            )
+            + 0.498988042467941 * z,
+            0.241571547304372
+            * x
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            0.0456527312854602 * (x2 - y2) * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.00931882475114763 * x * (52.5 - 472.5 * z2) * (x2 - 3.0 * y2),
+            2.07566231488104 * z * (-6.0 * x2 * y2 + x4 + y4),
+            -0.65638205684017 * x * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            4.09910463115149 * x**4 * xy
+            - 13.6636821038383 * xy**3
+            + 4.09910463115149 * xy * y**4,
+            -2.36661916223175 * yz * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            0.00427144889505798 * xy * (x2 - y2) * (5197.5 * z2 - 472.5),
+            0.00584892228263444
+            * y
+            * (3.0 * x2 - y2)
+            * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z),
+            0.0701870673916132
+            * xy
+            * (
+                2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                - 91.875 * z2
+                + 13.125
+            ),
+            0.221950995245231
+            * y
+            * (
+                -2.8 * z * (1.5 - 7.5 * z2)
+                + 2.2
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                - 4.8 * z
+            ),
+            -1.48328138624466
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            + 1.86469659985043
+            * z
+            * (
+                -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                + 1.8
+                * z
+                * (
+                    1.75
+                    * z
+                    * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                    - 1.125 * z2
+                    + 0.375
+                )
+                + 0.533333333333333 * z
+            )
+            + 0.953538034014426 * z2
+            - 0.317846011338142,
+            0.221950995245231
+            * x
+            * (
+                -2.8 * z * (1.5 - 7.5 * z2)
+                + 2.2
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                - 4.8 * z
+            ),
+            0.0350935336958066
+            * (x2 - y2)
+            * (
+                2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                - 91.875 * z2
+                + 13.125
+            ),
+            0.00584892228263444
+            * x
+            * (x2 - 3.0 * y2)
+            * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z),
+            0.0010678622237645 * (5197.5 * z2 - 472.5) * (-6.0 * x2 * y2 + x4 + y4),
+            -2.36661916223175 * xz * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            0.683184105191914 * x2**3
+            + 10.2477615778787 * x2 * y4
+            - 10.2477615778787 * x4 * y2
+            - 0.683184105191914 * y2**3,
+        ],
+        -1,
+    )
+def rsh_cart_7(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 7.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,64) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    x4 = x2**2
+    y4 = y2**2
+    z4 = z2**2
+    return torch.stack(
+        [
+            xyz.new_tensor(0.282094791773878).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+            -0.590043589926644 * y * (3.0 * x2 - y2),
+            2.89061144264055 * xy * z,
+            0.304697199642977 * y * (1.5 - 7.5 * z2),
+            1.24392110863372 * z * (1.5 * z2 - 0.5) - 0.497568443453487 * z,
+            0.304697199642977 * x * (1.5 - 7.5 * z2),
+            1.44530572132028 * z * (x2 - y2),
+            -0.590043589926644 * x * (x2 - 3.0 * y2),
+            2.5033429417967 * xy * (x2 - y2),
+            -1.77013076977993 * yz * (3.0 * x2 - y2),
+            0.126156626101008 * xy * (52.5 * z2 - 7.5),
+            0.267618617422916 * y * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            1.48099765681286
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            - 0.952069922236839 * z2
+            + 0.317356640745613,
+            0.267618617422916 * x * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            0.063078313050504 * (x2 - y2) * (52.5 * z2 - 7.5),
+            -1.77013076977993 * xz * (x2 - 3.0 * y2),
+            -3.75501441269506 * x2 * y2
+            + 0.625835735449176 * x4
+            + 0.625835735449176 * y4,
+            -0.65638205684017 * y * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            8.30264925952416 * xy * z * (x2 - y2),
+            0.00931882475114763 * y * (52.5 - 472.5 * z2) * (3.0 * x2 - y2),
+            0.0913054625709205 * xy * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.241571547304372
+            * y
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            -1.24747010616985 * z * (1.5 * z2 - 0.5)
+            + 1.6840846433293
+            * z
+            * (
+                1.75
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                - 1.125 * z2
+                + 0.375
+            )
+            + 0.498988042467941 * z,
+            0.241571547304372
+            * x
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            0.0456527312854602 * (x2 - y2) * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.00931882475114763 * x * (52.5 - 472.5 * z2) * (x2 - 3.0 * y2),
+            2.07566231488104 * z * (-6.0 * x2 * y2 + x4 + y4),
+            -0.65638205684017 * x * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            4.09910463115149 * x**4 * xy
+            - 13.6636821038383 * xy**3
+            + 4.09910463115149 * xy * y**4,
+            -2.36661916223175 * yz * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            0.00427144889505798 * xy * (x2 - y2) * (5197.5 * z2 - 472.5),
+            0.00584892228263444
+            * y
+            * (3.0 * x2 - y2)
+            * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z),
+            0.0701870673916132
+            * xy
+            * (
+                2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                - 91.875 * z2
+                + 13.125
+            ),
+            0.221950995245231
+            * y
+            * (
+                -2.8 * z * (1.5 - 7.5 * z2)
+                + 2.2
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                - 4.8 * z
+            ),
+            -1.48328138624466
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            + 1.86469659985043
+            * z
+            * (
+                -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                + 1.8
+                * z
+                * (
+                    1.75
+                    * z
+                    * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                    - 1.125 * z2
+                    + 0.375
+                )
+                + 0.533333333333333 * z
+            )
+            + 0.953538034014426 * z2
+            - 0.317846011338142,
+            0.221950995245231
+            * x
+            * (
+                -2.8 * z * (1.5 - 7.5 * z2)
+                + 2.2
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                - 4.8 * z
+            ),
+            0.0350935336958066
+            * (x2 - y2)
+            * (
+                2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                - 91.875 * z2
+                + 13.125
+            ),
+            0.00584892228263444
+            * x
+            * (x2 - 3.0 * y2)
+            * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z),
+            0.0010678622237645 * (5197.5 * z2 - 472.5) * (-6.0 * x2 * y2 + x4 + y4),
+            -2.36661916223175 * xz * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            0.683184105191914 * x2**3
+            + 10.2477615778787 * x2 * y4
+            - 10.2477615778787 * x4 * y2
+            - 0.683184105191914 * y2**3,
+            -0.707162732524596
+            * y
+            * (7.0 * x2**3 + 21.0 * x2 * y4 - 35.0 * x4 * y2 - y2**3),
+            2.6459606618019 * z * (6.0 * x**4 * xy - 20.0 * xy**3 + 6.0 * xy * y**4),
+            9.98394571852353e-5
+            * y
+            * (5197.5 - 67567.5 * z2)
+            * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            0.00239614697244565
+            * xy
+            * (x2 - y2)
+            * (4.33333333333333 * z * (5197.5 * z2 - 472.5) - 3150.0 * z),
+            0.00397356022507413
+            * y
+            * (3.0 * x2 - y2)
+            * (
+                3.25 * z * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z)
+                + 1063.125 * z2
+                - 118.125
+            ),
+            0.0561946276120613
+            * xy
+            * (
+                -4.8 * z * (52.5 * z2 - 7.5)
+                + 2.6
+                * z
+                * (
+                    2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                    - 91.875 * z2
+                    + 13.125
+                )
+                + 48.0 * z
+            ),
+            0.206472245902897
+            * y
+            * (
+                -2.625 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 2.16666666666667
+                * z
+                * (
+                    -2.8 * z * (1.5 - 7.5 * z2)
+                    + 2.2
+                    * z
+                    * (
+                        2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                        + 9.375 * z2
+                        - 1.875
+                    )
+                    - 4.8 * z
+                )
+                - 10.9375 * z2
+                + 2.1875
+            ),
+            1.24862677781952 * z * (1.5 * z2 - 0.5)
+            - 1.68564615005635
+            * z
+            * (
+                1.75
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                - 1.125 * z2
+                + 0.375
+            )
+            + 2.02901851395672
+            * z
+            * (
+                -1.45833333333333
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                + 1.83333333333333
+                * z
+                * (
+                    -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                    + 1.8
+                    * z
+                    * (
+                        1.75
+                        * z
+                        * (
+                            1.66666666666667 * z * (1.5 * z2 - 0.5)
+                            - 0.666666666666667 * z
+                        )
+                        - 1.125 * z2
+                        + 0.375
+                    )
+                    + 0.533333333333333 * z
+                )
+                + 0.9375 * z2
+                - 0.3125
+            )
+            - 0.499450711127808 * z,
+            0.206472245902897
+            * x
+            * (
+                -2.625 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 2.16666666666667
+                * z
+                * (
+                    -2.8 * z * (1.5 - 7.5 * z2)
+                    + 2.2
+                    * z
+                    * (
+                        2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                        + 9.375 * z2
+                        - 1.875
+                    )
+                    - 4.8 * z
+                )
+                - 10.9375 * z2
+                + 2.1875
+            ),
+            0.0280973138060306
+            * (x2 - y2)
+            * (
+                -4.8 * z * (52.5 * z2 - 7.5)
+                + 2.6
+                * z
+                * (
+                    2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                    - 91.875 * z2
+                    + 13.125
+                )
+                + 48.0 * z
+            ),
+            0.00397356022507413
+            * x
+            * (x2 - 3.0 * y2)
+            * (
+                3.25 * z * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z)
+                + 1063.125 * z2
+                - 118.125
+            ),
+            0.000599036743111412
+            * (4.33333333333333 * z * (5197.5 * z2 - 472.5) - 3150.0 * z)
+            * (-6.0 * x2 * y2 + x4 + y4),
+            9.98394571852353e-5
+            * x
+            * (5197.5 - 67567.5 * z2)
+            * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            2.6459606618019 * z * (x2**3 + 15.0 * x2 * y4 - 15.0 * x4 * y2 - y2**3),
+            -0.707162732524596
+            * x
+            * (x2**3 + 35.0 * x2 * y4 - 21.0 * x4 * y2 - 7.0 * y2**3),
+        ],
+        -1,
+    )
+# @torch.jit.script
+def rsh_cart_8(xyz: torch.Tensor):
+    """Computes all real spherical harmonics up to degree 8.
+    This is an autogenerated method. See
+    https://github.com/cheind/torch-spherical-harmonics
+    for more information.
+    Params:
+        xyz: (N,...,3) tensor of points on the unit sphere
+    Returns:
+        rsh: (N,...,81) real spherical harmonics
+            projections of input. Ynm is found at index
+            `n*(n+1) + m`, with `0 <= n <= degree` and
+            `-n <= m <= n`.
+    """
+    x = xyz[..., 0]
+    y = xyz[..., 1]
+    z = xyz[..., 2]
+    x2 = x**2
+    y2 = y**2
+    z2 = z**2
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    x4 = x2**2
+    y4 = y2**2
+    # z4 = z2**2
+    return torch.stack(
+        [
+            0.282094791773878 * torch.ones(1, device=xyz.device).expand(xyz.shape[:-1]),
+            -0.48860251190292 * y,
+            0.48860251190292 * z,
+            -0.48860251190292 * x,
+            1.09254843059208 * xy,
+            -1.09254843059208 * yz,
+            0.94617469575756 * z2 - 0.31539156525252,
+            -1.09254843059208 * xz,
+            0.54627421529604 * x2 - 0.54627421529604 * y2,
+            -0.590043589926644 * y * (3.0 * x2 - y2),
+            2.89061144264055 * xy * z,
+            0.304697199642977 * y * (1.5 - 7.5 * z2),
+            1.24392110863372 * z * (1.5 * z2 - 0.5) - 0.497568443453487 * z,
+            0.304697199642977 * x * (1.5 - 7.5 * z2),
+            1.44530572132028 * z * (x2 - y2),
+            -0.590043589926644 * x * (x2 - 3.0 * y2),
+            2.5033429417967 * xy * (x2 - y2),
+            -1.77013076977993 * yz * (3.0 * x2 - y2),
+            0.126156626101008 * xy * (52.5 * z2 - 7.5),
+            0.267618617422916 * y * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            1.48099765681286
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            - 0.952069922236839 * z2
+            + 0.317356640745613,
+            0.267618617422916 * x * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z),
+            0.063078313050504 * (x2 - y2) * (52.5 * z2 - 7.5),
+            -1.77013076977993 * xz * (x2 - 3.0 * y2),
+            -3.75501441269506 * x2 * y2
+            + 0.625835735449176 * x4
+            + 0.625835735449176 * y4,
+            -0.65638205684017 * y * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            8.30264925952416 * xy * z * (x2 - y2),
+            0.00931882475114763 * y * (52.5 - 472.5 * z2) * (3.0 * x2 - y2),
+            0.0913054625709205 * xy * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.241571547304372
+            * y
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            -1.24747010616985 * z * (1.5 * z2 - 0.5)
+            + 1.6840846433293
+            * z
+            * (
+                1.75
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                - 1.125 * z2
+                + 0.375
+            )
+            + 0.498988042467941 * z,
+            0.241571547304372
+            * x
+            * (
+                2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 9.375 * z2
+                - 1.875
+            ),
+            0.0456527312854602 * (x2 - y2) * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z),
+            0.00931882475114763 * x * (52.5 - 472.5 * z2) * (x2 - 3.0 * y2),
+            2.07566231488104 * z * (-6.0 * x2 * y2 + x4 + y4),
+            -0.65638205684017 * x * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            4.09910463115149 * x**4 * xy
+            - 13.6636821038383 * xy**3
+            + 4.09910463115149 * xy * y**4,
+            -2.36661916223175 * yz * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            0.00427144889505798 * xy * (x2 - y2) * (5197.5 * z2 - 472.5),
+            0.00584892228263444
+            * y
+            * (3.0 * x2 - y2)
+            * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z),
+            0.0701870673916132
+            * xy
+            * (
+                2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                - 91.875 * z2
+                + 13.125
+            ),
+            0.221950995245231
+            * y
+            * (
+                -2.8 * z * (1.5 - 7.5 * z2)
+                + 2.2
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                - 4.8 * z
+            ),
+            -1.48328138624466
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            + 1.86469659985043
+            * z
+            * (
+                -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                + 1.8
+                * z
+                * (
+                    1.75
+                    * z
+                    * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                    - 1.125 * z2
+                    + 0.375
+                )
+                + 0.533333333333333 * z
+            )
+            + 0.953538034014426 * z2
+            - 0.317846011338142,
+            0.221950995245231
+            * x
+            * (
+                -2.8 * z * (1.5 - 7.5 * z2)
+                + 2.2
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                - 4.8 * z
+            ),
+            0.0350935336958066
+            * (x2 - y2)
+            * (
+                2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                - 91.875 * z2
+                + 13.125
+            ),
+            0.00584892228263444
+            * x
+            * (x2 - 3.0 * y2)
+            * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z),
+            0.0010678622237645 * (5197.5 * z2 - 472.5) * (-6.0 * x2 * y2 + x4 + y4),
+            -2.36661916223175 * xz * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            0.683184105191914 * x2**3
+            + 10.2477615778787 * x2 * y4
+            - 10.2477615778787 * x4 * y2
+            - 0.683184105191914 * y2**3,
+            -0.707162732524596
+            * y
+            * (7.0 * x2**3 + 21.0 * x2 * y4 - 35.0 * x4 * y2 - y2**3),
+            2.6459606618019 * z * (6.0 * x**4 * xy - 20.0 * xy**3 + 6.0 * xy * y**4),
+            9.98394571852353e-5
+            * y
+            * (5197.5 - 67567.5 * z2)
+            * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            0.00239614697244565
+            * xy
+            * (x2 - y2)
+            * (4.33333333333333 * z * (5197.5 * z2 - 472.5) - 3150.0 * z),
+            0.00397356022507413
+            * y
+            * (3.0 * x2 - y2)
+            * (
+                3.25 * z * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z)
+                + 1063.125 * z2
+                - 118.125
+            ),
+            0.0561946276120613
+            * xy
+            * (
+                -4.8 * z * (52.5 * z2 - 7.5)
+                + 2.6
+                * z
+                * (
+                    2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                    - 91.875 * z2
+                    + 13.125
+                )
+                + 48.0 * z
+            ),
+            0.206472245902897
+            * y
+            * (
+                -2.625 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 2.16666666666667
+                * z
+                * (
+                    -2.8 * z * (1.5 - 7.5 * z2)
+                    + 2.2
+                    * z
+                    * (
+                        2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                        + 9.375 * z2
+                        - 1.875
+                    )
+                    - 4.8 * z
+                )
+                - 10.9375 * z2
+                + 2.1875
+            ),
+            1.24862677781952 * z * (1.5 * z2 - 0.5)
+            - 1.68564615005635
+            * z
+            * (
+                1.75
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                - 1.125 * z2
+                + 0.375
+            )
+            + 2.02901851395672
+            * z
+            * (
+                -1.45833333333333
+                * z
+                * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                + 1.83333333333333
+                * z
+                * (
+                    -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                    + 1.8
+                    * z
+                    * (
+                        1.75
+                        * z
+                        * (
+                            1.66666666666667 * z * (1.5 * z2 - 0.5)
+                            - 0.666666666666667 * z
+                        )
+                        - 1.125 * z2
+                        + 0.375
+                    )
+                    + 0.533333333333333 * z
+                )
+                + 0.9375 * z2
+                - 0.3125
+            )
+            - 0.499450711127808 * z,
+            0.206472245902897
+            * x
+            * (
+                -2.625 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                + 2.16666666666667
+                * z
+                * (
+                    -2.8 * z * (1.5 - 7.5 * z2)
+                    + 2.2
+                    * z
+                    * (
+                        2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                        + 9.375 * z2
+                        - 1.875
+                    )
+                    - 4.8 * z
+                )
+                - 10.9375 * z2
+                + 2.1875
+            ),
+            0.0280973138060306
+            * (x2 - y2)
+            * (
+                -4.8 * z * (52.5 * z2 - 7.5)
+                + 2.6
+                * z
+                * (
+                    2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                    - 91.875 * z2
+                    + 13.125
+                )
+                + 48.0 * z
+            ),
+            0.00397356022507413
+            * x
+            * (x2 - 3.0 * y2)
+            * (
+                3.25 * z * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z)
+                + 1063.125 * z2
+                - 118.125
+            ),
+            0.000599036743111412
+            * (4.33333333333333 * z * (5197.5 * z2 - 472.5) - 3150.0 * z)
+            * (-6.0 * x2 * y2 + x4 + y4),
+            9.98394571852353e-5
+            * x
+            * (5197.5 - 67567.5 * z2)
+            * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            2.6459606618019 * z * (x2**3 + 15.0 * x2 * y4 - 15.0 * x4 * y2 - y2**3),
+            -0.707162732524596
+            * x
+            * (x2**3 + 35.0 * x2 * y4 - 21.0 * x4 * y2 - 7.0 * y2**3),
+            5.83141328139864 * xy * (x2**3 + 7.0 * x2 * y4 - 7.0 * x4 * y2 - y2**3),
+            -2.91570664069932
+            * yz
+            * (7.0 * x2**3 + 21.0 * x2 * y4 - 35.0 * x4 * y2 - y2**3),
+            7.87853281621404e-6
+            * (1013512.5 * z2 - 67567.5)
+            * (6.0 * x**4 * xy - 20.0 * xy**3 + 6.0 * xy * y**4),
+            5.10587282657803e-5
+            * y
+            * (5.0 * z * (5197.5 - 67567.5 * z2) + 41580.0 * z)
+            * (-10.0 * x2 * y2 + 5.0 * x4 + y4),
+            0.00147275890257803
+            * xy
+            * (x2 - y2)
+            * (
+                3.75 * z * (4.33333333333333 * z * (5197.5 * z2 - 472.5) - 3150.0 * z)
+                - 14293.125 * z2
+                + 1299.375
+            ),
+            0.0028519853513317
+            * y
+            * (3.0 * x2 - y2)
+            * (
+                -7.33333333333333 * z * (52.5 - 472.5 * z2)
+                + 3.0
+                * z
+                * (
+                    3.25 * z * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z)
+                    + 1063.125 * z2
+                    - 118.125
+                )
+                - 560.0 * z
+            ),
+            0.0463392770473559
+            * xy
+            * (
+                -4.125 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                + 2.5
+                * z
+                * (
+                    -4.8 * z * (52.5 * z2 - 7.5)
+                    + 2.6
+                    * z
+                    * (
+                        2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                        - 91.875 * z2
+                        + 13.125
+                    )
+                    + 48.0 * z
+                )
+                + 137.8125 * z2
+                - 19.6875
+            ),
+            0.193851103820053
+            * y
+            * (
+                3.2 * z * (1.5 - 7.5 * z2)
+                - 2.51428571428571
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                + 2.14285714285714
+                * z
+                * (
+                    -2.625 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 2.16666666666667
+                    * z
+                    * (
+                        -2.8 * z * (1.5 - 7.5 * z2)
+                        + 2.2
+                        * z
+                        * (
+                            2.25
+                            * z
+                            * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                            + 9.375 * z2
+                            - 1.875
+                        )
+                        - 4.8 * z
+                    )
+                    - 10.9375 * z2
+                    + 2.1875
+                )
+                + 5.48571428571429 * z
+            ),
+            1.48417251362228
+            * z
+            * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+            - 1.86581687426801
+            * z
+            * (
+                -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                + 1.8
+                * z
+                * (
+                    1.75
+                    * z
+                    * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                    - 1.125 * z2
+                    + 0.375
+                )
+                + 0.533333333333333 * z
+            )
+            + 2.1808249179756
+            * z
+            * (
+                1.14285714285714 * z * (1.5 * z2 - 0.5)
+                - 1.54285714285714
+                * z
+                * (
+                    1.75
+                    * z
+                    * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                    - 1.125 * z2
+                    + 0.375
+                )
+                + 1.85714285714286
+                * z
+                * (
+                    -1.45833333333333
+                    * z
+                    * (1.66666666666667 * z * (1.5 * z2 - 0.5) - 0.666666666666667 * z)
+                    + 1.83333333333333
+                    * z
+                    * (
+                        -1.33333333333333 * z * (1.5 * z2 - 0.5)
+                        + 1.8
+                        * z
+                        * (
+                            1.75
+                            * z
+                            * (
+                                1.66666666666667 * z * (1.5 * z2 - 0.5)
+                                - 0.666666666666667 * z
+                            )
+                            - 1.125 * z2
+                            + 0.375
+                        )
+                        + 0.533333333333333 * z
+                    )
+                    + 0.9375 * z2
+                    - 0.3125
+                )
+                - 0.457142857142857 * z
+            )
+            - 0.954110901614325 * z2
+            + 0.318036967204775,
+            0.193851103820053
+            * x
+            * (
+                3.2 * z * (1.5 - 7.5 * z2)
+                - 2.51428571428571
+                * z
+                * (
+                    2.25 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 9.375 * z2
+                    - 1.875
+                )
+                + 2.14285714285714
+                * z
+                * (
+                    -2.625 * z * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                    + 2.16666666666667
+                    * z
+                    * (
+                        -2.8 * z * (1.5 - 7.5 * z2)
+                        + 2.2
+                        * z
+                        * (
+                            2.25
+                            * z
+                            * (2.33333333333333 * z * (1.5 - 7.5 * z2) + 4.0 * z)
+                            + 9.375 * z2
+                            - 1.875
+                        )
+                        - 4.8 * z
+                    )
+                    - 10.9375 * z2
+                    + 2.1875
+                )
+                + 5.48571428571429 * z
+            ),
+            0.0231696385236779
+            * (x2 - y2)
+            * (
+                -4.125 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                + 2.5
+                * z
+                * (
+                    -4.8 * z * (52.5 * z2 - 7.5)
+                    + 2.6
+                    * z
+                    * (
+                        2.75 * z * (3.0 * z * (52.5 * z2 - 7.5) - 30.0 * z)
+                        - 91.875 * z2
+                        + 13.125
+                    )
+                    + 48.0 * z
+                )
+                + 137.8125 * z2
+                - 19.6875
+            ),
+            0.0028519853513317
+            * x
+            * (x2 - 3.0 * y2)
+            * (
+                -7.33333333333333 * z * (52.5 - 472.5 * z2)
+                + 3.0
+                * z
+                * (
+                    3.25 * z * (3.66666666666667 * z * (52.5 - 472.5 * z2) + 280.0 * z)
+                    + 1063.125 * z2
+                    - 118.125
+                )
+                - 560.0 * z
+            ),
+            0.000368189725644507
+            * (-6.0 * x2 * y2 + x4 + y4)
+            * (
+                3.75 * z * (4.33333333333333 * z * (5197.5 * z2 - 472.5) - 3150.0 * z)
+                - 14293.125 * z2
+                + 1299.375
+            ),
+            5.10587282657803e-5
+            * x
+            * (5.0 * z * (5197.5 - 67567.5 * z2) + 41580.0 * z)
+            * (-10.0 * x2 * y2 + x4 + 5.0 * y4),
+            7.87853281621404e-6
+            * (1013512.5 * z2 - 67567.5)
+            * (x2**3 + 15.0 * x2 * y4 - 15.0 * x4 * y2 - y2**3),
+            -2.91570664069932
+            * xz
+            * (x2**3 + 35.0 * x2 * y4 - 21.0 * x4 * y2 - 7.0 * y2**3),
+            -20.4099464848952 * x2**3 * y2
+            - 20.4099464848952 * x2 * y2**3
+            + 0.72892666017483 * x4**2
+            + 51.0248662122381 * x4 * y4
+            + 0.72892666017483 * y4**2,
+        ],
+        -1,
+    )
+__all__ = [
+    "rsh_cart_0",
+    "rsh_cart_1",
+    "rsh_cart_2",
+    "rsh_cart_3",
+    "rsh_cart_4",
+    "rsh_cart_5",
+    "rsh_cart_6",
+    "rsh_cart_7",
+    "rsh_cart_8",
+]
+from typing import Optional
+import torch
+class SphHarm(torch.nn.Module):
+    def __init__(self, m, n, dtype=torch.float32) -> None:
+        super().__init__()
+        self.dtype = dtype
+        m = torch.tensor(list(range(-m + 1, m)))
+        n = torch.tensor(list(range(n)))
+        self.is_normalized = False
+        vals = torch.cartesian_prod(m, n).T
+        vals = vals[:, vals[0] <= vals[1]]
+        m, n = vals.unbind(0)
+        self.register_buffer("m", tensor=m)
+        self.register_buffer("n", tensor=n)
+        self.register_buffer("l_max", tensor=torch.max(self.n))
+        f_a, f_b, initial_value, d0_mask_3d, d1_mask_3d = self._init_legendre()
+        self.register_buffer("f_a", tensor=f_a)
+        self.register_buffer("f_b", tensor=f_b)
+        self.register_buffer("d0_mask_3d", tensor=d0_mask_3d)
+        self.register_buffer("d1_mask_3d", tensor=d1_mask_3d)
+        self.register_buffer("initial_value", tensor=initial_value)
+    @property
+    def device(self):
+        return next(self.buffers()).device
+    def forward(self, points: torch.Tensor) -> torch.Tensor:
+        """Computes the spherical harmonics."""
+        # Y_l^m = (-1) ^ m c_l^m P_l^m(cos(theta)) exp(i m phi)
+        B, N, D = points.shape
+        dtype = points.dtype
+        theta, phi = points.view(-1, D).to(self.dtype).unbind(-1)
+        cos_colatitude = torch.cos(phi)
+        legendre = self._gen_associated_legendre(cos_colatitude)
+        vals = torch.stack([self.m.abs(), self.n], dim=0)
+        vals = torch.cat(
+            [
+                vals.repeat(1, theta.shape[0]),
+                torch.arange(theta.shape[0], device=theta.device)
+                .unsqueeze(0)
+                .repeat_interleave(vals.shape[1], dim=1),
+            ],
+            dim=0,
+        )
+        legendre_vals = legendre[vals[0], vals[1], vals[2]]
+        legendre_vals = legendre_vals.reshape(-1, theta.shape[0])
+        angle = torch.outer(self.m.abs(), theta)
+        vandermonde = torch.complex(torch.cos(angle), torch.sin(angle))
+        harmonics = torch.complex(
+            legendre_vals * torch.real(vandermonde),
+            legendre_vals * torch.imag(vandermonde),
+        )
+        # Negative order.
+        m = self.m.unsqueeze(-1)
+        harmonics = torch.where(
+            m < 0, (-1.0) ** m.abs() * torch.conj(harmonics), harmonics
+        )
+        harmonics = harmonics.permute(1, 0).reshape(B, N, -1).to(dtype)
+        return harmonics
+    def _gen_recurrence_mask(self) -> tuple[torch.Tensor, torch.Tensor]:
+        """Generates mask for recurrence relation on the remaining entries.
+        The remaining entries are with respect to the diagonal and offdiagonal
+        entries.
+        Args:
+        l_max: see `gen_normalized_legendre`.
+        Returns:
+        torch.Tensors representing the mask used by the recurrence relations.
+        """
+        # Computes all coefficients.
+        m_mat, l_mat = torch.meshgrid(
+            torch.arange(0, self.l_max + 1, device=self.device, dtype=self.dtype),
+            torch.arange(0, self.l_max + 1, device=self.device, dtype=self.dtype),
+            indexing="ij",
+        )
+        if self.is_normalized:
+            c0 = l_mat * l_mat
+            c1 = m_mat * m_mat
+            c2 = 2.0 * l_mat
+            c3 = (l_mat - 1.0) * (l_mat - 1.0)
+            d0 = torch.sqrt((4.0 * c0 - 1.0) / (c0 - c1))
+            d1 = torch.sqrt(((c2 + 1.0) * (c3 - c1)) / ((c2 - 3.0) * (c0 - c1)))
+        else:
+            d0 = (2.0 * l_mat - 1.0) / (l_mat - m_mat)
+            d1 = (l_mat + m_mat - 1.0) / (l_mat - m_mat)
+        d0_mask_indices = torch.triu_indices(self.l_max + 1, 1)
+        d1_mask_indices = torch.triu_indices(self.l_max + 1, 2)
+        d_zeros = torch.zeros(
+            (self.l_max + 1, self.l_max + 1), dtype=self.dtype, device=self.device
+        )
+        d_zeros[d0_mask_indices] = d0[d0_mask_indices]
+        d0_mask = d_zeros
+        d_zeros = torch.zeros(
+            (self.l_max + 1, self.l_max + 1), dtype=self.dtype, device=self.device
+        )
+        d_zeros[d1_mask_indices] = d1[d1_mask_indices]
+        d1_mask = d_zeros
+        # Creates a 3D mask that contains 1s on the diagonal plane and 0s elsewhere.
+        i = torch.arange(self.l_max + 1, device=self.device)[:, None, None]
+        j = torch.arange(self.l_max + 1, device=self.device)[None, :, None]
+        k = torch.arange(self.l_max + 1, device=self.device)[None, None, :]
+        mask = (i + j - k == 0).to(self.dtype)
+        d0_mask_3d = torch.einsum("jk,ijk->ijk", d0_mask, mask)
+        d1_mask_3d = torch.einsum("jk,ijk->ijk", d1_mask, mask)
+        return (d0_mask_3d, d1_mask_3d)
+    def _recursive(self, i: int, p_val: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
+        coeff_0 = self.d0_mask_3d[i]
+        coeff_1 = self.d1_mask_3d[i]
+        h = torch.einsum(
+            "ij,ijk->ijk",
+            coeff_0,
+            torch.einsum("ijk,k->ijk", torch.roll(p_val, shifts=1, dims=1), x),
+        ) - torch.einsum("ij,ijk->ijk", coeff_1, torch.roll(p_val, shifts=2, dims=1))
+        p_val = p_val + h
+        return p_val
+    def _init_legendre(self):
+        a_idx = torch.arange(1, self.l_max + 1, dtype=self.dtype, device=self.device)
+        b_idx = torch.arange(self.l_max, dtype=self.dtype, device=self.device)
+        if self.is_normalized:
+            # The initial value p(0,0).
+            initial_value: torch.Tensor = torch.tensor(
+                0.5 / (torch.pi**0.5), device=self.device
+            )
+            f_a = torch.cumprod(-1 * torch.sqrt(1.0 + 0.5 / a_idx), dim=0)
+            f_b = torch.sqrt(2.0 * b_idx + 3.0)
+        else:
+            # The initial value p(0,0).
+            initial_value = torch.tensor(1.0, device=self.device)
+            f_a = torch.cumprod(1.0 - 2.0 * a_idx, dim=0)
+            f_b = 2.0 * b_idx + 1.0
+        d0_mask_3d, d1_mask_3d = self._gen_recurrence_mask()
+        return f_a, f_b, initial_value, d0_mask_3d, d1_mask_3d
+    def _gen_associated_legendre(self, x: torch.Tensor) -> torch.Tensor:
+        r"""Computes associated Legendre functions (ALFs) of the first kind.
+        The ALFs of the first kind are used in spherical harmonics. The spherical
+        harmonic of degree `l` and order `m` can be written as
+        `Y_l^m(θ, φ) = N_l^m * P_l^m(cos(θ)) * exp(i m φ)`, where `N_l^m` is the
+        normalization factor and θ and φ are the colatitude and longitude,
+        repectively. `N_l^m` is chosen in the way that the spherical harmonics form
+        a set of orthonormal basis function of L^2(S^2). For the computational
+        efficiency of spherical harmonics transform, the normalization factor is
+        used in the computation of the ALFs. In addition, normalizing `P_l^m`
+        avoids overflow/underflow and achieves better numerical stability. Three
+        recurrence relations are used in the computation.
+        Args:
+        l_max: The maximum degree of the associated Legendre function. Both the
+            degrees and orders are `[0, 1, 2, ..., l_max]`.
+        x: A vector of type `float32`, `float64` containing the sampled points in
+            spherical coordinates, at which the ALFs are computed; `x` is essentially
+            `cos(θ)`. For the numerical integration used by the spherical harmonics
+            transforms, `x` contains the quadrature points in the interval of
+            `[-1, 1]`. There are several approaches to provide the quadrature points:
+            Gauss-Legendre method (`scipy.special.roots_legendre`), Gauss-Chebyshev
+            method (`scipy.special.roots_chebyu`), and Driscoll & Healy
+            method (Driscoll, James R., and Dennis M. Healy. "Computing Fourier
+            transforms and convolutions on the 2-sphere." Advances in applied
+            mathematics 15, no. 2 (1994): 202-250.). The Gauss-Legendre quadrature
+            points are nearly equal-spaced along θ and provide exact discrete
+            orthogonality, (P^m)^T W P_m = I, where `T` represents the transpose
+            operation, `W` is a diagonal matrix containing the quadrature weights,
+            and `I` is the identity matrix. The Gauss-Chebyshev points are equally
+            spaced, which only provide approximate discrete orthogonality. The
+            Driscoll & Healy qudarture points are equally spaced and provide the
+            exact discrete orthogonality. The number of sampling points is required to
+            be twice as the number of frequency points (modes) in the Driscoll & Healy
+            approach, which enables FFT and achieves a fast spherical harmonics
+            transform.
+        is_normalized: True if the associated Legendre functions are normalized.
+            With normalization, `N_l^m` is applied such that the spherical harmonics
+            form a set of orthonormal basis functions of L^2(S^2).
+        Returns:
+        The 3D array of shape `(l_max + 1, l_max + 1, len(x))` containing the values
+        of the ALFs at `x`; the dimensions in the sequence of order, degree, and
+        evalution points.
+        """
+        p = torch.zeros(
+            (self.l_max + 1, self.l_max + 1, x.shape[0]), dtype=x.dtype, device=x.device
+        )
+        p[0, 0] = self.initial_value
+        # Compute the diagonal entries p(l,l) with recurrence.
+        y = torch.cumprod(
+            torch.broadcast_to(torch.sqrt(1.0 - x * x), (self.l_max, x.shape[0])), dim=0
+        )
+        p_diag = self.initial_value * torch.einsum("i,ij->ij", self.f_a, y)
+        # torch.diag_indices(l_max + 1)
+        diag_indices = torch.stack(
+            [torch.arange(0, self.l_max + 1, device=x.device)] * 2, dim=0
+        )
+        p[(diag_indices[0][1:], diag_indices[1][1:])] = p_diag
+        diag_indices = torch.stack(
+            [torch.arange(0, self.l_max, device=x.device)] * 2, dim=0
+        )
+        # Compute the off-diagonal entries with recurrence.
+        p_offdiag = torch.einsum(
+            "ij,ij->ij",
+            torch.einsum("i,j->ij", self.f_b, x),
+            p[(diag_indices[0], diag_indices[1])],
+        )  # p[torch.diag_indices(l_max)])
+        p[(diag_indices[0][: self.l_max], diag_indices[1][: self.l_max] + 1)] = (
+            p_offdiag
+        )
+        # Compute the remaining entries with recurrence.
+        if self.l_max > 1:
+            for i in range(2, self.l_max + 1):
+                p = self._recursive(i, p, x)
+        return p

src/misc/utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+from src.visualization.color_map import apply_color_map_to_image
+import torch.distributed as dist
+def inverse_normalize(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
+    mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device).view(-1, 1, 1)
+    std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device).view(-1, 1, 1)
+    return tensor.mul(std).add(mean)
+# Color-map the result.
+def vis_depth_map(result, near=None, far=None):
+    if near is None and far is None:
+        far = result.view(-1)[:16_000_000].quantile(0.99).log()
+        try:
+            near = result[result > 0][:16_000_000].quantile(0.01).log()
+        except:
+            print("No valid depth values found.")
+            near = torch.zeros_like(far)
+    else:
+        near = near.log()
+        far = far.log()
+    result = result.log()
+    result = 1 - (result - near) / (far - near)
+    return apply_color_map_to_image(result, "turbo")
+def confidence_map(result):
+    # far = result.view(-1)[:16_000_000].quantile(0.99).log()
+    # try:
+    #     near = result[result > 0][:16_000_000].quantile(0.01).log()
+    # except:
+    #     print("No valid depth values found.")
+    #     near = torch.zeros_like(far)
+    # result = result.log()
+    # result = 1 - (result - near) / (far - near)
+    result = result / result.view(-1).max()
+    return apply_color_map_to_image(result, "magma")
+def get_overlap_tag(overlap):
+    if 0.05 <= overlap <= 0.3:
+        overlap_tag = "small"
+    elif overlap <= 0.55:
+        overlap_tag = "medium"
+    elif overlap <= 0.8:
+        overlap_tag = "large"
+    else:
+        overlap_tag = "ignore"
+    return overlap_tag
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()

src/model/decoder/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from .decoder import Decoder
+from .decoder_splatting_cuda import DecoderSplattingCUDA, DecoderSplattingCUDACfg
+DECODERS = {
+    "splatting_cuda": DecoderSplattingCUDA,
+}
+DecoderCfg = DecoderSplattingCUDACfg
+def get_decoder(decoder_cfg: DecoderCfg) -> Decoder:
+    return DECODERS[decoder_cfg.name](decoder_cfg)

src/model/decoder/cuda_splatting.py ADDED Viewed

	@@ -0,0 +1,244 @@

+from math import isqrt
+from typing import Literal
+import torch
+from diff_gaussian_rasterization import (
+    GaussianRasterizationSettings,
+    GaussianRasterizer,
+)
+from einops import einsum, rearrange, repeat
+from jaxtyping import Float, Bool
+from torch import Tensor
+from ...geometry.projection import get_fov, homogenize_points
+def get_projection_matrix(
+    near: Float[Tensor, " batch"],
+    far: Float[Tensor, " batch"],
+    fov_x: Float[Tensor, " batch"],
+    fov_y: Float[Tensor, " batch"],
+) -> Float[Tensor, "batch 4 4"]:
+    """Maps points in the viewing frustum to (-1, 1) on the X/Y axes and (0, 1) on the Z
+    axis. Differs from the OpenGL version in that Z doesn't have range (-1, 1) after
+    transformation and that Z is flipped.
+    """
+    tan_fov_x = (0.5 * fov_x).tan()
+    tan_fov_y = (0.5 * fov_y).tan()
+    top = tan_fov_y * near
+    bottom = -top
+    right = tan_fov_x * near
+    left = -right
+    (b,) = near.shape
+    result = torch.zeros((b, 4, 4), dtype=torch.float32, device=near.device)
+    result[:, 0, 0] = 2 * near / (right - left)
+    result[:, 1, 1] = 2 * near / (top - bottom)
+    result[:, 0, 2] = (right + left) / (right - left)
+    result[:, 1, 2] = (top + bottom) / (top - bottom)
+    result[:, 3, 2] = 1
+    result[:, 2, 2] = far / (far - near)
+    result[:, 2, 3] = -(far * near) / (far - near)
+    return result
+def render_cuda(
+    extrinsics: Float[Tensor, "batch 4 4"],
+    intrinsics: Float[Tensor, "batch 3 3"],
+    near: Float[Tensor, " batch"],
+    far: Float[Tensor, " batch"],
+    image_shape: tuple[int, int],
+    background_color: Float[Tensor, "batch 3"],
+    gaussian_means: Float[Tensor, "batch gaussian 3"],
+    gaussian_covariances: Float[Tensor, "batch gaussian 3 3"],
+    gaussian_sh_coefficients: Float[Tensor, "batch gaussian 3 d_sh"],
+    gaussian_opacities: Float[Tensor, "batch gaussian"],
+    scale_invariant: bool = True,
+    use_sh: bool = True,
+    cam_rot_delta: Float[Tensor, "batch 3"] | None = None,
+    cam_trans_delta: Float[Tensor, "batch 3"] | None = None,
+    voxel_masks: Bool[Tensor, "batch gaussian"] | None = None,
+) -> tuple[Float[Tensor, "batch 3 height width"], Float[Tensor, "batch height width"]]:
+    assert use_sh or gaussian_sh_coefficients.shape[-1] == 1
+    # Make sure everything is in a range where numerical issues don't appear.
+    if scale_invariant:
+        scale = 1 / near
+        extrinsics = extrinsics.clone()
+        extrinsics[..., :3, 3] = extrinsics[..., :3, 3] * scale[:, None]
+        gaussian_covariances = gaussian_covariances * (scale[:, None, None, None] ** 2)
+        gaussian_means = gaussian_means * scale[:, None, None]
+        near = near * scale
+        far = far * scale
+    _, _, _, n = gaussian_sh_coefficients.shape
+    degree = isqrt(n) - 1
+    shs = rearrange(gaussian_sh_coefficients, "b g xyz n -> b g n xyz").contiguous()
+    b, _, _ = extrinsics.shape
+    h, w = image_shape
+    fov_x, fov_y = get_fov(intrinsics).unbind(dim=-1)
+    tan_fov_x = (0.5 * fov_x).tan()
+    tan_fov_y = (0.5 * fov_y).tan()
+    projection_matrix = get_projection_matrix(near, far, fov_x, fov_y)
+    projection_matrix = rearrange(projection_matrix, "b i j -> b j i")
+    view_matrix = rearrange(extrinsics.inverse(), "b i j -> b j i")
+    full_projection = view_matrix @ projection_matrix
+    all_images = []
+    all_radii = []
+    all_depths = []
+    for i in range(b):
+        # Set up a tensor for the gradients of the screen-space means.
+        mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True)
+        try:
+            mean_gradients.retain_grad()
+        except Exception:
+            pass
+        settings = GaussianRasterizationSettings(
+            image_height=h,
+            image_width=w,
+            tanfovx=tan_fov_x[i].item(),
+            tanfovy=tan_fov_y[i].item(),
+            bg=background_color[i],
+            scale_modifier=1.0,
+            viewmatrix=view_matrix[i],
+            projmatrix=full_projection[i],
+            projmatrix_raw=projection_matrix[i],
+            sh_degree=degree,
+            campos=extrinsics[i, :3, 3],
+            prefiltered=False,  # This matches the original usage.
+            debug=False,
+        )
+        rasterizer = GaussianRasterizer(settings)
+        row, col = torch.triu_indices(3, 3)
+        if voxel_masks is not None:
+            voxel_mask = voxel_masks[i]
+            image, radii, depth, opacity, n_touched = rasterizer(
+                means3D=gaussian_means[i][voxel_mask],
+                means2D=mean_gradients[voxel_mask],
+                shs=shs[i][voxel_mask] if use_sh else None,
+                colors_precomp=None if use_sh else shs[i, :, 0, :][voxel_mask],
+                opacities=gaussian_opacities[i][voxel_mask, ..., None],
+                cov3D_precomp=gaussian_covariances[i, :, row, col][voxel_mask],
+                theta=cam_rot_delta[i] if cam_rot_delta is not None else None,
+                rho=cam_trans_delta[i] if cam_trans_delta is not None else None,
+            )
+        else:
+            image, radii, depth, opacity, n_touched = rasterizer(
+                means3D=gaussian_means[i],
+                means2D=mean_gradients,
+                shs=shs[i] if use_sh else None,
+                colors_precomp=None if use_sh else shs[i, :, 0, :],
+                opacities=gaussian_opacities[i, ..., None],
+                cov3D_precomp=gaussian_covariances[i, :, row, col],
+                theta=cam_rot_delta[i] if cam_rot_delta is not None else None,
+                rho=cam_trans_delta[i] if cam_trans_delta is not None else None,
+            )
+        all_images.append(image)
+        all_radii.append(radii)
+        all_depths.append(depth.squeeze(0))
+    return torch.stack(all_images), torch.stack(all_depths)
+def render_cuda_orthographic(
+    extrinsics: Float[Tensor, "batch 4 4"],
+    width: Float[Tensor, " batch"],
+    height: Float[Tensor, " batch"],
+    near: Float[Tensor, " batch"],
+    far: Float[Tensor, " batch"],
+    image_shape: tuple[int, int],
+    background_color: Float[Tensor, "batch 3"],
+    gaussian_means: Float[Tensor, "batch gaussian 3"],
+    gaussian_covariances: Float[Tensor, "batch gaussian 3 3"],
+    gaussian_sh_coefficients: Float[Tensor, "batch gaussian 3 d_sh"],
+    gaussian_opacities: Float[Tensor, "batch gaussian"],
+    fov_degrees: float = 0.1,
+    use_sh: bool = True,
+    dump: dict | None = None,
+) -> Float[Tensor, "batch 3 height width"]:
+    b, _, _ = extrinsics.shape
+    h, w = image_shape
+    assert use_sh or gaussian_sh_coefficients.shape[-1] == 1
+    _, _, _, n = gaussian_sh_coefficients.shape
+    degree = isqrt(n) - 1
+    shs = rearrange(gaussian_sh_coefficients, "b g xyz n -> b g n xyz").contiguous()
+    # Create fake "orthographic" projection by moving the camera back and picking a
+    # small field of view.
+    fov_x = torch.tensor(fov_degrees, device=extrinsics.device).deg2rad()
+    tan_fov_x = (0.5 * fov_x).tan()
+    distance_to_near = (0.5 * width) / tan_fov_x
+    tan_fov_y = 0.5 * height / distance_to_near
+    fov_y = (2 * tan_fov_y).atan()
+    near = near + distance_to_near
+    far = far + distance_to_near
+    move_back = torch.eye(4, dtype=torch.float32, device=extrinsics.device)
+    move_back[2, 3] = -distance_to_near
+    extrinsics = extrinsics @ move_back
+    # Escape hatch for visualization/figures.
+    if dump is not None:
+        dump["extrinsics"] = extrinsics
+        dump["fov_x"] = fov_x
+        dump["fov_y"] = fov_y
+        dump["near"] = near
+        dump["far"] = far
+    projection_matrix = get_projection_matrix(
+        near, far, repeat(fov_x, "-> b", b=b), fov_y
+    )
+    projection_matrix = rearrange(projection_matrix, "b i j -> b j i")
+    view_matrix = rearrange(extrinsics.inverse(), "b i j -> b j i")
+    full_projection = view_matrix @ projection_matrix
+    all_images = []
+    all_radii = []
+    for i in range(b):
+        # Set up a tensor for the gradients of the screen-space means.
+        mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True)
+        try:
+            mean_gradients.retain_grad()
+        except Exception:
+            pass
+        settings = GaussianRasterizationSettings(
+            image_height=h,
+            image_width=w,
+            tanfovx=tan_fov_x,
+            tanfovy=tan_fov_y,
+            bg=background_color[i],
+            scale_modifier=1.0,
+            viewmatrix=view_matrix[i],
+            projmatrix=full_projection[i],
+            projmatrix_raw=projection_matrix[i],
+            sh_degree=degree,
+            campos=extrinsics[i, :3, 3],
+            prefiltered=False,  # This matches the original usage.
+            debug=False,
+        )
+        rasterizer = GaussianRasterizer(settings)
+        row, col = torch.triu_indices(3, 3)
+        image, radii, depth, opacity, n_touched = rasterizer(
+            means3D=gaussian_means[i],
+            means2D=mean_gradients,
+            shs=shs[i] if use_sh else None,
+            colors_precomp=None if use_sh else shs[i, :, 0, :],
+            opacities=gaussian_opacities[i, ..., None],
+            cov3D_precomp=gaussian_covariances[i, :, row, col],
+        )
+        all_images.append(image)
+        all_radii.append(radii)
+    return torch.stack(all_images)
+DepthRenderingMode = Literal["depth", "disparity", "relative_disparity", "log"]

src/model/decoder/decoder.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Generic, Literal, TypeVar, Optional
+from jaxtyping import Float
+from torch import Tensor, nn
+from ..types import Gaussians
+DepthRenderingMode = Literal[
+    "depth",
+    "log",
+    "disparity",
+    "relative_disparity",
+]
+@dataclass
+class DecoderOutput:
+    color: Float[Tensor, "batch view 3 height width"]
+    depth: Float[Tensor, "batch view height width"] | None
+    alpha: Float[Tensor, "batch view height width"] | None
+    lod_rendering: dict | None
+    pts_all: Optional[Float[Tensor, "batch view height width 3"]]=None
+    conf: Optional[Float[Tensor, "batch view height width"]]=None
+T = TypeVar("T")
+class Decoder(nn.Module, ABC, Generic[T]):
+    cfg: T
+    def __init__(self, cfg: T) -> None:
+        super().__init__()
+        self.cfg = cfg
+    @abstractmethod
+    def forward(
+        self,
+        gaussians: Gaussians,
+        extrinsics: Float[Tensor, "batch view 4 4"],
+        intrinsics: Float[Tensor, "batch view 3 3"],
+        near: Float[Tensor, "batch view"],
+        far: Float[Tensor, "batch view"],
+        image_shape: tuple[int, int],
+        depth_mode: DepthRenderingMode | None = None,
+    ) -> DecoderOutput:
+        pass